diff options
695 files changed, 32366 insertions, 16601 deletions
diff --git a/.gitignore b/.gitignore index 9bb1cb6d825d..869e1a3b64b6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ # subdirectories here. Add them in the ".gitignore" file # in that subdirectory instead. # +# NOTE! Please use 'git-ls-files -i --exclude-standard' +# command after changing this file, to see if there are +# any tracked files which get ignored after the change. +# # Normal rules # .* @@ -18,19 +22,21 @@ *.lst *.symtypes *.order +*.elf +*.bin +*.gz # # Top-level generic files # tags TAGS -vmlinux* -!vmlinux.lds.S -!vmlinux.lds.h +vmlinux System.map Module.markers Module.symvers !.gitignore +!.mailmap # # Generated include files @@ -2611,8 +2611,9 @@ S: Perth, Western Australia S: Australia N: Miguel Ojeda Sandonis -W: http://maxextreme.googlepages.com/ +W: http://miguelojeda.es +W: http://jair.lab.fi.uva.es/~migojed/ D: Author of the ks0108, cfag12864b and cfag12864bfb auxiliary display drivers. D: Maintainer of the auxiliary display drivers tree (drivers/auxdisplay/*) S: C/ Mieses 20, 9-B diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap new file mode 100644 index 000000000000..0d99ee6ae02e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-memmap @@ -0,0 +1,71 @@ +What: /sys/firmware/memmap/ +Date: June 2008 +Contact: Bernhard Walle <[email protected]> +Description: + On all platforms, the firmware provides a memory map which the + kernel reads. The resources from that memory map are registered + in the kernel resource tree and exposed to userspace via + /proc/iomem (together with other resources). + + However, on most architectures that firmware-provided memory + map is modified afterwards by the kernel itself, either because + the kernel merges that memory map with other information or + just because the user overwrites that memory map via command + line. + + kexec needs the raw firmware-provided memory map to setup the + parameter segment of the kernel that should be booted with + kexec. Also, the raw memory map is useful for debugging. For + that reason, /sys/firmware/memmap is an interface that provides + the raw memory map to userspace. + + The structure is as follows: Under /sys/firmware/memmap there + are subdirectories with the number of the entry as their name: + + /sys/firmware/memmap/0 + /sys/firmware/memmap/1 + /sys/firmware/memmap/2 + /sys/firmware/memmap/3 + ... + + The maximum depends on the number of memory map entries provided + by the firmware. The order is just the order that the firmware + provides. + + Each directory contains three files: + + start : The start address (as hexadecimal number with the + '0x' prefix). + end : The end address, inclusive (regardless whether the + firmware provides inclusive or exclusive ranges). + type : Type of the entry as string. See below for a list of + valid types. + + So, for example: + + /sys/firmware/memmap/0/start + /sys/firmware/memmap/0/end + /sys/firmware/memmap/0/type + /sys/firmware/memmap/1/start + ... + + Currently following types exist: + + - System RAM + - ACPI Tables + - ACPI Non-volatile Storage + - reserved + + Following shell snippet can be used to display that memory + map in a human-readable format: + + -------------------- 8< ---------------------------------------- + #!/bin/bash + cd /sys/firmware/memmap + for dir in * ; do + start=$(cat $dir/start) + end=$(cat $dir/end) + type=$(cat $dir/type) + printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type" + done + -------------------- >8 ---------------------------------------- diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt index 8aa7529f8258..cd784f46bf8a 100644 --- a/Documentation/accounting/taskstats-struct.txt +++ b/Documentation/accounting/taskstats-struct.txt @@ -24,6 +24,8 @@ There are three different groups of fields in the struct taskstats: 4) Per-task and per-thread context switch count statistics +5) Time accounting for SMT machines + Future extension should add fields to the end of the taskstats struct, and should not change the relative position of each field within the struct. @@ -164,4 +166,8 @@ struct taskstats { __u64 nvcsw; /* Context voluntary switch counter */ __u64 nivcsw; /* Context involuntary switch counter */ +5) Time accounting for SMT machines + __u64 ac_utimescaled; /* utime scaled on frequency etc */ + __u64 ac_stimescaled; /* stime scaled on frequency etc */ + __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ } diff --git a/Documentation/auxdisplay/cfag12864b b/Documentation/auxdisplay/cfag12864b index b714183d4125..eb7be393a510 100644 --- a/Documentation/auxdisplay/cfag12864b +++ b/Documentation/auxdisplay/cfag12864b @@ -3,7 +3,7 @@ =================================== License: GPLv2 -Author & Maintainer: Miguel Ojeda Sandonis <[email protected]> +Author & Maintainer: Miguel Ojeda Sandonis Date: 2006-10-27 @@ -22,7 +22,7 @@ Date: 2006-10-27 1. DRIVER INFORMATION --------------------- -This driver support one cfag12864b display at time. +This driver supports a cfag12864b LCD. --------------------- diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c index 7bfac354d4c9..2caeea5e4993 100644 --- a/Documentation/auxdisplay/cfag12864b-example.c +++ b/Documentation/auxdisplay/cfag12864b-example.c @@ -4,7 +4,7 @@ * Description: cfag12864b LCD userspace example program * License: GPLv2 * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify diff --git a/Documentation/auxdisplay/ks0108 b/Documentation/auxdisplay/ks0108 index 92b03b60c613..8ddda0c8ceef 100644 --- a/Documentation/auxdisplay/ks0108 +++ b/Documentation/auxdisplay/ks0108 @@ -3,7 +3,7 @@ ========================================== License: GPLv2 -Author & Maintainer: Miguel Ojeda Sandonis <[email protected]> +Author & Maintainer: Miguel Ojeda Sandonis Date: 2006-10-27 @@ -21,7 +21,7 @@ Date: 2006-10-27 1. DRIVER INFORMATION --------------------- -This driver support the ks0108 LCD controller. +This driver supports the ks0108 LCD controller. --------------------- diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 824fc0274471..d9014aa0eb68 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt @@ -390,6 +390,10 @@ If you have several tasks to attach, you have to do it one after another: ... # /bin/echo PIDn > tasks +You can attach the current shell task by echoing 0: + +# echo 0 > tasks + 3. Kernel API ============= diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt index 4dcea42432c2..7cc6e6a60672 100644 --- a/Documentation/controllers/devices.txt +++ b/Documentation/controllers/devices.txt @@ -13,7 +13,7 @@ either an integer or * for all. Access is a composition of r The root device cgroup starts with rwm to 'all'. A child device cgroup gets a copy of the parent. Administrators can then remove devices from the whitelist or add new entries. A child cgroup can -never receive a device access which is denied its parent. However +never receive a device access which is denied by its parent. However when a device access is removed from a parent it will not also be removed from the child(ren). @@ -29,7 +29,11 @@ allows cgroup 1 to read and mknod the device usually known as echo a > /cgroups/1/devices.deny -will remove the default 'a *:* mrw' entry. +will remove the default 'a *:* rwm' entry. Doing + + echo a > /cgroups/1/devices.allow + +will add the 'a *:* rwm' entry to the whitelist. 3. Security diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 353504de3084..1f5a924d1e56 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -154,13 +154,15 @@ browsing and modifying the cpusets presently known to the kernel. No new system calls are added for cpusets - all support for querying and modifying cpusets is via this cpuset file system. -The /proc/<pid>/status file for each task has two added lines, +The /proc/<pid>/status file for each task has four added lines, displaying the tasks cpus_allowed (on which CPUs it may be scheduled) and mems_allowed (on which Memory Nodes it may obtain memory), -in the format seen in the following example: +in the two formats seen in the following example: Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff + Cpus_allowed_list: 0-127 Mems_allowed: ffffffff,ffffffff + Mems_allowed_list: 0-63 Each cpuset is represented by a directory in the cgroup file system containing (on top of the standard cgroup files) the following @@ -544,6 +546,9 @@ otherwise initial value -1 that indicates the cpuset has no request. ( 4 : search nodes in a chunk of node [on NUMA system] ) ( 5 : search system wide [on NUMA system] ) +The system default is architecture dependent. The system default +can be changed using the relax_domain_level= boot parameter. + This file is per-cpuset and affect the sched domain where the cpuset belongs to. Therefore if the flag 'sched_load_balance' of a cpuset is disabled, then 'sched_relax_domain_level' have no effect since diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5b3f31faed56..46ece3fba6f9 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -312,3 +312,12 @@ When: 2.6.26 Why: Implementation became generic; users should now include linux/semaphore.h instead. Who: Matthew Wilcox <[email protected]> + +--------------------------- + +What: CONFIG_THERMAL_HWMON +When: January 2009 +Why: This option was introduced just to allow older lm-sensors userspace + to keep working over the upgrade to 2.6.26. At the scheduled time of + removal fixed lm-sensors (2.x or 3.x) should be readily available. +Who: Rene Herman <[email protected]> diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index ee75cbace28d..d4cd4126d1ad 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -25,12 +25,23 @@ routines, and should be zero-initialized except for fields with data you provide. A client structure holds device-specific information like the driver model device node, and its I2C address. +/* iff driver uses driver model ("new style") binding model: */ + +static struct i2c_device_id foo_idtable[] = { + { "foo", my_id_for_foo }, + { "bar", my_id_for_bar }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, foo_idtable); + static struct i2c_driver foo_driver = { .driver = { .name = "foo", }, /* iff driver uses driver model ("new style") binding model: */ + .id_table = foo_ids, .probe = foo_probe, .remove = foo_remove, @@ -173,10 +184,9 @@ handle may be used during foo_probe(). If foo_probe() reports success (zero not a negative status code) it may save the handle and use it until foo_remove() returns. That binding model is used by most Linux drivers. -Drivers match devices when i2c_client.driver_name and the driver name are -the same; this approach is used in several other busses that don't have -device typing support in the hardware. The driver and module name should -match, so hotplug/coldplug mechanisms will modprobe the driver. +The probe function is called when an entry in the id_table name field +matches the device's name. It is passed the entry that was matched so +the driver knows which one in the table matched. Device Creation (Standard driver model) diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index b8e52c0355d3..9691c7f5166c 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -109,7 +109,7 @@ There are two possible methods of using Kdump. 2) Or use the system kernel binary itself as dump-capture kernel and there is no need to build a separate dump-capture kernel. This is possible only with the architecutres which support a relocatable kernel. As - of today i386 and ia64 architectures support relocatable kernel. + of today, i386, x86_64 and ia64 architectures support relocatable kernel. Building a relocatable kernel is advantageous from the point of view that one does not have to build a second kernel for capturing the dump. But diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432c731f..795c487af8e4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -271,6 +271,17 @@ and is between 256 and 4096 characters. It is defined in the file aic79xx= [HW,SCSI] See Documentation/scsi/aic79xx.txt. + amd_iommu= [HW,X86-84] + Pass parameters to the AMD IOMMU driver in the system. + Possible values are: + isolate - enable device isolation (each device, as far + as possible, will get its own protection + domain) + amd_iommu_size= [HW,X86-64] + Define the size of the aperture for the AMD IOMMU + driver. Possible values are: + '32M', '64M' (default), '128M', '256M', '512M', '1G' + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: <a>,<b> @@ -295,7 +306,7 @@ and is between 256 and 4096 characters. It is defined in the file when initialising the APIC and IO-APIC components. apm= [APM] Advanced Power Management - See header of arch/i386/kernel/apm.c. + See header of arch/x86/kernel/apm_32.c. arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards Format: <io>,<irq>,<nodeID> @@ -599,6 +610,29 @@ and is between 256 and 4096 characters. It is defined in the file See drivers/char/README.epca and Documentation/digiepca.txt. + disable_mtrr_cleanup [X86] + enable_mtrr_cleanup [X86] + The kernel tries to adjust MTRR layout from continuous + to discrete, to make X server driver able to add WB + entry later. This parameter enables/disables that. + + mtrr_chunk_size=nn[KMG] [X86] + used for mtrr cleanup. It is largest continous chunk + that could hold holes aka. UC entries. + + mtrr_gran_size=nn[KMG] [X86] + Used for mtrr cleanup. It is granularity of mtrr block. + Default is 1. + Large value could prevent small alignment from + using up MTRRs. + + mtrr_spare_reg_nr=n [X86] + Format: <integer> + Range: 0,7 : spare reg number + Default : 1 + Used for mtrr cleanup. It is spare mtrr entries number. + Set to 2 or more if your graphical card needs more. + disable_mtrr_trim [X86, Intel and AMD only] By default the kernel will trim any uncacheable memory out of your available memory pool based on @@ -638,7 +672,7 @@ and is between 256 and 4096 characters. It is defined in the file elanfreq= [X86-32] See comment before function elanfreq_setup() in - arch/i386/kernel/cpu/cpufreq/elanfreq.c. + arch/x86/kernel/cpu/cpufreq/elanfreq.c. elevator= [IOSCHED] Format: {"anticipatory" | "cfq" | "deadline" | "noop"} @@ -1679,6 +1713,10 @@ and is between 256 and 4096 characters. It is defined in the file Format: <reboot_mode>[,<reboot_mode2>[,...]] See arch/*/kernel/reboot.c or arch/*/kernel/process.c + relax_domain_level= + [KNL, SMP] Set scheduler's default relax_domain_level. + See Documentation/cpusets.txt. + reserve= [KNL,BUGS] Force the kernel to ignore some iomem area reservetop= [X86-32] @@ -2112,6 +2150,9 @@ and is between 256 and 4096 characters. It is defined in the file usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. + add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in + kernel's map of available physical RAM. + vdso= [X86-32,SH,x86-64] vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17a6e46fbd43..17f1f91af35c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -81,23 +81,23 @@ inet_peer_minttl - INTEGER Minimum time-to-live of entries. Should be enough to cover fragment time-to-live on the reassembling side. This minimum time-to-live is guaranteed if the pool size is less than inet_peer_threshold. - Measured in jiffies(1). + Measured in seconds. inet_peer_maxttl - INTEGER Maximum time-to-live of entries. Unused entries will expire after this period of time if there is no memory pressure on the pool (i.e. when the number of entries in the pool is very small). - Measured in jiffies(1). + Measured in seconds. inet_peer_gc_mintime - INTEGER Minimum interval between garbage collection passes. This interval is in effect under high memory pressure on the pool. - Measured in jiffies(1). + Measured in seconds. inet_peer_gc_maxtime - INTEGER Minimum interval between garbage collection passes. This interval is in effect under low (or absent) memory pressure on the pool. - Measured in jiffies(1). + Measured in seconds. TCP variables: @@ -794,10 +794,6 @@ tag - INTEGER Allows you to write a number, which can be used as required. Default value is 0. -(1) Jiffie: internal timeunit for the kernel. On the i386 1/100s, on the -Alpha 1/1024s. See the HZ define in /usr/include/asm/param.h for the exact -value on your system. - Alexey Kuznetsov. diff --git a/Documentation/networking/s2io.txt b/Documentation/networking/s2io.txt index 4bde53e85f3f..1e28e2ddb90a 100644 --- a/Documentation/networking/s2io.txt +++ b/Documentation/networking/s2io.txt @@ -83,9 +83,9 @@ Valid range: Limited by memory on system Default: 30 e. intr_type -Specifies interrupt type. Possible values 1(INTA), 2(MSI), 3(MSI-X) -Valid range: 1-3 -Default: 1 +Specifies interrupt type. Possible values 0(INTA), 2(MSI-X) +Valid values: 0, 2 +Default: 2 5. Performance suggestions General: diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt index 757c729ee42e..90aa4531cb67 100644 --- a/Documentation/nmi_watchdog.txt +++ b/Documentation/nmi_watchdog.txt @@ -10,7 +10,7 @@ us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt which get executed even if the system is otherwise locked up hard). This can be used to debug hard kernel lockups. By executing periodic NMI interrupts, the kernel can monitor whether any CPU has locked up, -and print out debugging messages if so. +and print out debugging messages if so. In order to use the NMI watchdog, you need to have APIC support in your kernel. For SMP kernels, APIC support gets compiled in automatically. For @@ -22,8 +22,7 @@ CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain kernel debugging options, such as Kernel Stack Meter or Kernel Tracer, may implicitly disable the NMI watchdog.] -For x86-64, the needed APIC is always compiled in, and the NMI watchdog is -always enabled with I/O-APIC mode (nmi_watchdog=1). +For x86-64, the needed APIC is always compiled in. Using local APIC (nmi_watchdog=2) needs the first performance register, so you can't use it for other purposes (such as high precision performance @@ -63,16 +62,15 @@ when the system is idle), but if your system locks up on anything but the "hlt", then you are out of luck -- the event will not happen at all and the watchdog won't trigger. This is a shortcoming of the local APIC watchdog -- unfortunately there is no "clock ticks" event that would work all the -time. The I/O APIC watchdog is driven externally and has no such shortcoming. +time. The I/O APIC watchdog is driven externally and has no such shortcoming. But its NMI frequency is much higher, resulting in a more significant hit to the overall system performance. -NOTE: starting with 2.4.2-ac18 the NMI-oopser is disabled by default, -you have to enable it with a boot time parameter. Prior to 2.4.2-ac18 -the NMI-oopser is enabled unconditionally on x86 SMP boxes. +On x86 nmi_watchdog is disabled by default so you have to enable it with +a boot time parameter. -On x86-64 the NMI oopser is on by default. On 64bit Intel CPUs -it uses IO-APIC by default and on AMD it uses local APIC. +NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally +on x86 SMP boxes. [ feel free to send bug reports, suggestions and patches to Ingo Molnar <[email protected]> or the Linux SMP mailing diff --git a/Documentation/video4linux/CARDLIST.au0828 b/Documentation/video4linux/CARDLIST.au0828 index aaae360312e4..86d1c8e7b18f 100644 --- a/Documentation/video4linux/CARDLIST.au0828 +++ b/Documentation/video4linux/CARDLIST.au0828 @@ -1,4 +1,4 @@ 0 -> Unknown board (au0828) - 1 -> Hauppauge HVR950Q (au0828) [2040:7200] + 1 -> Hauppauge HVR950Q (au0828) [2040:7200,2040:7210,2040:7217,2040:721b,2040:721f,2040:7280,0fd9:0008] 2 -> Hauppauge HVR850 (au0828) [2040:7240] 3 -> DViCO FusionHDTV USB (au0828) [0fe9:d620] diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index e4230ed16ee7..df3227605d59 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -1,7 +1,7 @@ /* * Slabinfo: Tool to get reports about slabs * - * (C) 2007 sgi, Christoph Lameter <[email protected]> + * (C) 2007 sgi, Christoph Lameter * * Compile by: * @@ -99,7 +99,7 @@ void fatal(const char *x, ...) void usage(void) { - printf("slabinfo 5/7/2007. (c) 2007 sgi. [email protected]\n\n" + printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" "-a|--aliases Show aliases\n" "-A|--activity Most active slabs first\n" diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index 7c13f22a0c9e..bb1f5c6e28b3 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -266,4 +266,4 @@ of other objects. slub_debug=FZ,dentry -Christoph Lameter, <[email protected]>, May 30, 2007 +Christoph Lameter, May 30, 2007 diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/x86/i386/IO-APIC.txt index 30b4c714fbe1..30b4c714fbe1 100644 --- a/Documentation/i386/IO-APIC.txt +++ b/Documentation/x86/i386/IO-APIC.txt diff --git a/Documentation/i386/boot.txt b/Documentation/x86/i386/boot.txt index 95ad15c3b01f..147bfe511cdd 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/x86/i386/boot.txt @@ -1,17 +1,14 @@ - THE LINUX/I386 BOOT PROTOCOL - ---------------------------- + THE LINUX/x86 BOOT PROTOCOL + --------------------------- - H. Peter Anvin <[email protected]> - Last update 2007-05-23 - -On the i386 platform, the Linux kernel uses a rather complicated boot +On the x86 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as well as the desire in the early days to have the kernel itself be a bootable image, the complicated PC memory model and due to changed expectations in the PC industry caused by the effective demise of real-mode DOS as a mainstream operating system. -Currently, the following versions of the Linux/i386 boot protocol exist. +Currently, the following versions of the Linux/x86 boot protocol exist. Old kernels: zImage/Image support only. Some very early kernels may not even support a command line. @@ -372,10 +369,17 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. + Bit 5 (write): QUIET_FLAG + - If 0, print early messages. + - If 1, suppress early messages. + This requests to the kernel (decompressor and early + kernel) to not write early messages that require + accessing the display hardware directly. + Bit 6 (write): KEEP_SEGMENTS Protocol: 2.07+ - - if 0, reload the segment registers in the 32bit entry point. - - if 1, do not reload the segment registers in the 32bit entry point. + - If 0, reload the segment registers in the 32bit entry point. + - If 1, do not reload the segment registers in the 32bit entry point. Assume that %cs %ds %ss %es are all set to flat segments with a base of 0 (or the equivalent for their environment). @@ -504,7 +508,7 @@ Protocol: 2.06+ maximum size was 255. Field name: hardware_subarch -Type: write +Type: write (optional, defaults to x86/PC) Offset/size: 0x23c/4 Protocol: 2.07+ @@ -520,11 +524,13 @@ Protocol: 2.07+ 0x00000002 Xen Field name: hardware_subarch_data -Type: write +Type: write (subarch-dependent) Offset/size: 0x240/8 Protocol: 2.07+ A pointer to data that is specific to hardware subarch + This field is currently unused for the default x86/PC environment, + do not modify. Field name: payload_offset Type: read @@ -545,6 +551,34 @@ Protocol: 2.08+ The length of the payload. +Field name: setup_data +Type: write (special) +Offset/size: 0x250/8 +Protocol: 2.09+ + + The 64-bit physical pointer to NULL terminated single linked list of + struct setup_data. This is used to define a more extensible boot + parameters passing mechanism. The definition of struct setup_data is + as follow: + + struct setup_data { + u64 next; + u32 type; + u32 len; + u8 data[0]; + }; + + Where, the next is a 64-bit physical pointer to the next node of + linked list, the next field of the last node is 0; the type is used + to identify the contents of data; the len is the length of data + field; the data holds the real payload. + + This list may be modified at a number of points during the bootup + process. Therefore, when modifying this list one should always make + sure to consider the case where the linked list already contains + entries. + + **** THE IMAGE CHECKSUM From boot protocol version 2.08 onwards the CRC-32 is calculated over @@ -553,6 +587,7 @@ initial remainder of 0xffffffff. The checksum is appended to the file; therefore the CRC of the file up to the limit specified in the syssize field of the header is always 0. + **** THE KERNEL COMMAND LINE The kernel command line has become an important way for the boot @@ -584,28 +619,6 @@ command line is entered using the following protocol: covered by setup_move_size, so you may need to adjust this field. -Field name: setup_data -Type: write (obligatory) -Offset/size: 0x250/8 -Protocol: 2.09+ - - The 64-bit physical pointer to NULL terminated single linked list of - struct setup_data. This is used to define a more extensible boot - parameters passing mechanism. The definition of struct setup_data is - as follow: - - struct setup_data { - u64 next; - u32 type; - u32 len; - u8 data[0]; - }; - - Where, the next is a 64-bit physical pointer to the next node of - linked list, the next field of the last node is 0; the type is used - to identify the contents of data; the len is the length of data - field; the data holds the real payload. - **** MEMORY LAYOUT OF THE REAL-MODE CODE diff --git a/Documentation/i386/usb-legacy-support.txt b/Documentation/x86/i386/usb-legacy-support.txt index 1894cdfc69d9..1894cdfc69d9 100644 --- a/Documentation/i386/usb-legacy-support.txt +++ b/Documentation/x86/i386/usb-legacy-support.txt diff --git a/Documentation/i386/zero-page.txt b/Documentation/x86/i386/zero-page.txt index 169ad423a3d1..169ad423a3d1 100644 --- a/Documentation/i386/zero-page.txt +++ b/Documentation/x86/i386/zero-page.txt diff --git a/Documentation/x86_64/00-INDEX b/Documentation/x86/x86_64/00-INDEX index 92fc20ab5f0e..92fc20ab5f0e 100644 --- a/Documentation/x86_64/00-INDEX +++ b/Documentation/x86/x86_64/00-INDEX diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index b0c7b6c4abda..b0c7b6c4abda 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt diff --git a/Documentation/x86_64/cpu-hotplug-spec b/Documentation/x86/x86_64/cpu-hotplug-spec index 3c23e0587db3..3c23e0587db3 100644 --- a/Documentation/x86_64/cpu-hotplug-spec +++ b/Documentation/x86/x86_64/cpu-hotplug-spec diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets index d1a985c5b00a..d1a985c5b00a 100644 --- a/Documentation/x86_64/fake-numa-for-cpusets +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86/x86_64/kernel-stacks index 5ad65d51fb95..5ad65d51fb95 100644 --- a/Documentation/x86_64/kernel-stacks +++ b/Documentation/x86/x86_64/kernel-stacks diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck index a05e58e7b159..a05e58e7b159 100644 --- a/Documentation/x86_64/machinecheck +++ b/Documentation/x86/x86_64/machinecheck diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index b89b6d2bebfa..efce75097369 100644 --- a/Documentation/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -11,9 +11,8 @@ ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... -ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 -... unused hole ... -ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space +ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 +ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory diff --git a/Documentation/x86_64/uefi.txt b/Documentation/x86/x86_64/uefi.txt index 7d77120a5184..a5e2b4fdb170 100644 --- a/Documentation/x86_64/uefi.txt +++ b/Documentation/x86/x86_64/uefi.txt @@ -36,3 +36,7 @@ Mechanics: services. noefi turn off all EFI runtime services reboot_type=k turn off EFI reboot runtime service +- If the EFI memory map has additional entries not in the E820 map, + you can include those entries in the kernels memory map of available + physical RAM by using the following kernel command line parameter. + add_efi_memmap include EFI memory map of available physical RAM diff --git a/MAINTAINERS b/MAINTAINERS index 8f0ec46a7096..c94d038cea33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -376,6 +376,12 @@ L: [email protected] (moderated for non-subscribers) W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html S: Supported +AMD IOMMU (AMD-VI) +P: Joerg Roedel +S: Supported + AMS (Apple Motion Sensor) DRIVER P: Stelian Pop @@ -763,9 +769,10 @@ S: Maintained AUXILIARY DISPLAY DRIVERS P: Miguel Ojeda Sandonis -W: http://auxdisplay.googlepages.com/ +W: http://miguelojeda.es/auxdisplay.htm +W: http://jair.lab.fi.uva.es/~migojed/auxdisplay.htm S: Maintained AVR32 ARCHITECTURE @@ -1055,16 +1062,18 @@ S: Supported CFAG12864B LCD DRIVER P: Miguel Ojeda Sandonis -W: http://auxdisplay.googlepages.com/ +W: http://miguelojeda.es/auxdisplay.htm +W: http://jair.lab.fi.uva.es/~migojed/auxdisplay.htm S: Maintained CFAG12864BFB LCD FRAMEBUFFER DRIVER P: Miguel Ojeda Sandonis -W: http://auxdisplay.googlepages.com/ +W: http://miguelojeda.es/auxdisplay.htm +W: http://jair.lab.fi.uva.es/~migojed/auxdisplay.htm S: Maintained CFG80211 and NL80211 @@ -1420,6 +1429,14 @@ M: [email protected] S: Supported +DOCUMENTATION (/Documentation directory) +P: Michael Kerrisk +P: Randy Dunlap +S: Maintained + DOUBLETALK DRIVER P: James R. Van Zandt @@ -1626,13 +1643,13 @@ S: Maintained EXT3 FILE SYSTEM P: Stephen Tweedie, Andrew Morton S: Maintained EXT4 FILE SYSTEM P: Stephen Tweedie, Andrew Morton S: Maintained @@ -2428,9 +2445,10 @@ S: Maintained KS0108 LCD CONTROLLER DRIVER P: Miguel Ojeda Sandonis -W: http://auxdisplay.googlepages.com/ +W: http://miguelojeda.es/auxdisplay.htm +W: http://jair.lab.fi.uva.es/~migojed/auxdisplay.htm S: Maintained LAPB module @@ -2474,9 +2492,11 @@ M: [email protected] W: http://www.hansenpartnership.com/voyager S: Maintained -LINUX FOR POWERPC +LINUX FOR POWERPC (32-BIT AND 64-BIT) P: Paul Mackerras +P: Benjamin Herrenschmidt W: http://www.penguinppc.org/ T: git kernel.org:/pub/scm/linux/kernel/git/paulus/powerpc.git @@ -2516,13 +2536,6 @@ W: http://wiki.secretlab.ca/index.php/Linux_on_Xilinx_Virtex S: Maintained -LINUX FOR POWERPC BOOT CODE -P: Tom Rini -W: http://www.penguinppc.org/ -S: Maintained - LINUX FOR POWERPC EMBEDDED PPC8XX P: Vitaly Bordug @@ -2551,17 +2564,6 @@ P: Arnaldo Carvalho de Melo S: Maintained -LINUX FOR 64BIT POWERPC -P: Paul Mackerras -P: Anton Blanchard -W: http://www.penguinppc.org/ppc64/ -S: Supported - LINUX SECURITY MODULE (LSM) FRAMEWORK P: Chris Wright @@ -2680,8 +2682,8 @@ S: Supported MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 P: Michael Kerrisk -W: ftp://ftp.kernel.org/pub/linux/docs/manpages -S: Maintained +W: http://www.kernel.org/doc/man-pages +S: Supported MARVELL LIBERTAS WIRELESS DRIVER P: Dan Williams @@ -2814,6 +2816,12 @@ W: https://tango.0pointer.de/mailman/listinfo/s270-linux W: http://0pointer.de/lennart/tchibo.html S: Maintained +MULTIFUNCTION DEVICES (MFD) +P: Samuel Ortiz +S: Supported + MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM P: Pierre Ossman @@ -3195,8 +3203,8 @@ L: [email protected] S: Maintained PER-TASK DELAY ACCOUNTING -P: Shailabh Nagar +P: Balbir Singh S: Maintained @@ -3688,7 +3696,7 @@ S: Maintained SLAB ALLOCATOR P: Christoph Lameter P: Pekka Enberg P: Matt Mackall @@ -3898,8 +3906,8 @@ M: [email protected] S: Maintained TASKSTATS STATISTICS INTERFACE -P: Shailabh Nagar +P: Balbir Singh S: Maintained @@ -3995,7 +4003,8 @@ W: http://www.buzzard.org.uk/toshiba/ S: Maintained TPM DEVICE DRIVER -P: Kylene Hall +P: Debora Velarde +P: Rajiv Andrade W: http://tpmdd.sourceforge.net P: Marcel Selhorst @@ -4314,6 +4323,14 @@ L: [email protected] W: http://www.linux-usb.org/usbnet S: Maintained +USB VIDEO CLASS +P: Laurent Pinchart +W: http://linux-uvc.berlios.de +S: Maintained + USB W996[87]CF DRIVER P: Luca Risolia @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 26 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Rotary Wombat # *DOCUMENTATION* diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index f13249be17c5..ef37fc1acaea 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -25,6 +25,13 @@ SECTIONS } :kernel _etext = .; /* End of text section */ + NOTES :kernel :note + .dummy : { + *(.dummy) + } :kernel + + RODATA + /* Exception table */ . = ALIGN(16); __ex_table : { @@ -33,13 +40,6 @@ SECTIONS __stop___ex_table = .; } - NOTES :kernel :note - .dummy : { - *(.dummy) - } :kernel - - RODATA - /* Will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 52fc6a883281..2744673314b4 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -650,7 +650,8 @@ EXPORT_SYMBOL(dma_map_sg); EXPORT_SYMBOL(dma_unmap_sg); EXPORT_SYMBOL(dma_sync_single_for_cpu); EXPORT_SYMBOL(dma_sync_single_for_device); -EXPORT_SYMBOL(dma_sync_sg); +EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(dma_sync_sg_for_device); EXPORT_SYMBOL(dmabounce_register_dev); EXPORT_SYMBOL(dmabounce_unregister_dev); diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 02cede295e89..dbf68dc50ae2 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -42,7 +42,7 @@ #define GPMC_STATUS 0x54 #define GPMC_PREFETCH_CONFIG1 0x1e0 #define GPMC_PREFETCH_CONFIG2 0x1e4 -#define GPMC_PREFETCH_CONTROL 0x1e8 +#define GPMC_PREFETCH_CONTROL 0x1ec #define GPMC_PREFETCH_STATUS 0x1f0 #define GPMC_ECC_CONFIG 0x1f4 #define GPMC_ECC_CONTROL 0x1f8 diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index 3e57428affee..8e813ed57519 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { + trace_hardirqs_off(); + /* * the primary core may have used a "cross call" soft interrupt * to get this processor out of WFI in the BootMonitor - make diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index c00eda588cd8..39c637b0ffea 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -501,8 +501,6 @@ static inline void omap_enable_channel_irq(int lch) /* Enable some nice interrupts. */ OMAP_DMA_CICR_REG(lch) = dma_chan[lch].enabled_irqs; - - dma_chan[lch].flags |= OMAP_DMA_ACTIVE; } static void omap_disable_channel_irq(int lch) diff --git a/arch/blackfin/kernel/cplb-nompu/cplbinit.c b/arch/blackfin/kernel/cplb-nompu/cplbinit.c index 917325bfbd84..6be0c50122e8 100644 --- a/arch/blackfin/kernel/cplb-nompu/cplbinit.c +++ b/arch/blackfin/kernel/cplb-nompu/cplbinit.c @@ -254,7 +254,8 @@ close_cplbtab(struct cplb_tab *table) } /* helper function */ -static void __fill_code_cplbtab(struct cplb_tab *t, int i, u32 a_start, u32 a_end) +static void __init +__fill_code_cplbtab(struct cplb_tab *t, int i, u32 a_start, u32 a_end) { if (cplb_data[i].psize) { fill_cplbtab(t, @@ -291,7 +292,8 @@ static void __fill_code_cplbtab(struct cplb_tab *t, int i, u32 a_start, u32 a_en } } -static void __fill_data_cplbtab(struct cplb_tab *t, int i, u32 a_start, u32 a_end) +static void __init +__fill_data_cplbtab(struct cplb_tab *t, int i, u32 a_start, u32 a_end) { if (cplb_data[i].psize) { fill_cplbtab(t, diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 73647c158774..07402f57c9de 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -60,9 +60,14 @@ static struct irq_chip bad_chip = { }; static struct irq_desc bad_irq_desc = { + .status = IRQ_DISABLED, .chip = &bad_chip, .handle_irq = handle_bad_irq, .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), +#ifdef CONFIG_SMP + .affinity = CPU_MASK_ALL +#endif }; int show_interrupts(struct seq_file *p, void *v) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4ae15c8c2488..632cda8f2e76 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -547,7 +547,8 @@ setup_arch (char **cmdline_p) # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? - 32 : cpus_weight(early_cpu_possible_map)), additional_cpus); + 32 : cpus_weight(early_cpu_possible_map)), + additional_cpus > 0 ? additional_cpus : 0); # endif #else # ifdef CONFIG_SMP diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 8c73643f2d66..aad1b7b1fff9 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -117,6 +117,7 @@ void account_system_vtime(struct task_struct *tsk) local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(account_system_vtime); /* * Called from the timer interrupt handler to charge accumulated user time diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e5a7c5d96364..24c5dee91768 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1006,7 +1006,7 @@ config BOOT_ELF32 config MIPS_L1_CACHE_SHIFT int default "4" if MACH_DECSTATION - default "7" if SGI_IP27 || SGI_IP28 || SNI_RM + default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM default "4" if PMC_MSP4200_EVAL default "5" diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index 795cb8fb0d74..b5fc4eb412d2 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -161,6 +161,9 @@ void __init txx9_tmr_init(unsigned long baseaddr) struct txx9_tmr_reg __iomem *tmrptr; tmrptr = ioremap(baseaddr, sizeof(struct txx9_tmr_reg)); + /* Start once to make CounterResetEnable effective */ + __raw_writel(TXx9_TMTCR_CRE | TXx9_TMTCR_TCE, &tmrptr->tcr); + /* Stop and reset the counter */ __raw_writel(TXx9_TMTCR_CRE, &tmrptr->tcr); __raw_writel(0, &tmrptr->tisr); __raw_writel(0xffffffff, &tmrptr->cpra); diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c index b0ea0e43ba48..0d6b6663d5f6 100644 --- a/arch/mips/sgi-ip32/ip32-irq.c +++ b/arch/mips/sgi-ip32/ip32-irq.c @@ -425,6 +425,11 @@ static void ip32_irq0(void) BUILD_BUG_ON(MACEISA_SERIAL2_RDMAOR_IRQ - MACEISA_AUDIO_SW_IRQ != 31); crime_int = crime->istat & crime_mask; + + /* crime sometime delivers spurious interrupts, ignore them */ + if (unlikely(crime_int == 0)) + return; + irq = MACE_VID_IN1_IRQ + __ffs(crime_int); if (crime_int & CRIME_MACEISA_INT_MASK) { diff --git a/arch/mn10300/kernel/mn10300_ksyms.c b/arch/mn10300/kernel/mn10300_ksyms.c index 6d19628634e3..f9eb9753a404 100644 --- a/arch/mn10300/kernel/mn10300_ksyms.c +++ b/arch/mn10300/kernel/mn10300_ksyms.c @@ -10,8 +10,11 @@ */ #include <linux/module.h> #include <asm/uaccess.h> +#include <asm/pgtable.h> +EXPORT_SYMBOL(empty_zero_page); + EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(test_and_change_bit); @@ -31,7 +34,9 @@ extern u64 __ashrdi3(u64, unsigned); extern u64 __ashldi3(u64, unsigned); extern u64 __lshrdi3(u64, unsigned); extern s64 __negdi2(s64); +extern int __ucmpdi2(u64, u64); EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(__negdi2); +EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 9c623c88387b..b28c9a60445b 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -153,6 +153,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * free current thread data structures etc.. diff --git a/arch/mn10300/lib/Makefile b/arch/mn10300/lib/Makefile index fdfa9ec5b5bb..0cd2346f4c13 100644 --- a/arch/mn10300/lib/Makefile +++ b/arch/mn10300/lib/Makefile @@ -4,4 +4,4 @@ lib-y = delay.o usercopy.o checksum.o bitops.o memcpy.o memmove.o memset.o lib-y += do_csum.o -lib-y += __ashldi3.o __ashrdi3.o __lshrdi3.o negdi2.o +lib-y += __ashldi3.o __ashrdi3.o __lshrdi3.o negdi2.o __ucmpdi2.o diff --git a/arch/mn10300/lib/__ucmpdi2.S b/arch/mn10300/lib/__ucmpdi2.S new file mode 100644 index 000000000000..60dcbdfe386c --- /dev/null +++ b/arch/mn10300/lib/__ucmpdi2.S @@ -0,0 +1,43 @@ +/* __ucmpdi2.S: 64-bit unsigned compare + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells ([email protected]) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + + .text + .p2align 4 + +############################################################################### +# +# int __ucmpdi2(unsigned long long a [D0:D1], +# unsigned long long b [(SP,12),(SP,16)]) +# +# - returns 0, 1, or 2 as a <, =, > b respectively. +# +############################################################################### + .globl __ucmpdi2 + .type __ucmpdi2,@function +__ucmpdi2: + mov (12,sp),a0 # b.lsw + mov (16,sp),a1 # b.msw + + sub a0,d0 + subc a1,d1 # may clear Z, never sets it + bne __ucmpdi2_differ # a.msw != b.msw + mov +1,d0 + rets + +__ucmpdi2_differ: + # C flag is set if LE, clear if GE + subc d0,d0 # -1 if LE, 0 if GE + add +1,d0 # 0 if LE, 1 if GE + add d0,d0 # 0 if LE, 2 if GE + rets + + .size __ucmpdi2, .-__ucmpdi2 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 1cee2f9fdf06..095e04db1c0e 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -273,7 +273,8 @@ endif initrd- := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-)) initrd-y := $(patsubst zImage%, zImage.initrd%, \ $(patsubst dtbImage%, dtbImage.initrd%, \ - $(patsubst treeImage%, treeImage.initrd%, $(image-y)))) + $(patsubst simpleImage%, simpleImage.initrd%, \ + $(patsubst treeImage%, treeImage.initrd%, $(image-y))))) initrd-y := $(filter-out $(image-y), $(initrd-y)) targets += $(image-y) $(initrd-y) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 61dd17449ddc..cf37f5ca4b71 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -136,6 +136,11 @@ static int __init add_legacy_soc_port(struct device_node *np, if (of_get_property(np, "clock-frequency", NULL) == NULL) return -1; + /* if reg-shift or offset, don't try to use it */ + if ((of_get_property(np, "reg-shift", NULL) != NULL) || + (of_get_property(np, "reg-offset", NULL) != NULL)) + return -1; + /* if rtas uses this device, don't try to use it as well */ if (of_get_property(np, "used-by-rtas", NULL) != NULL) return -1; diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c index 41c7fd91e99e..fe92e65103ed 100644 --- a/arch/powerpc/platforms/52xx/lite5200_pm.c +++ b/arch/powerpc/platforms/52xx/lite5200_pm.c @@ -14,6 +14,7 @@ static struct mpc52xx_sdma __iomem *bes; static struct mpc52xx_xlb __iomem *xlb; static struct mpc52xx_gpio __iomem *gps; static struct mpc52xx_gpio_wkup __iomem *gpw; +static void __iomem *pci; static void __iomem *sram; static const int sram_size = 0x4000; /* 16 kBytes */ static void __iomem *mbar; @@ -50,6 +51,8 @@ static int lite5200_pm_prepare(void) { .type = "builtin", .compatible = "mpc5200", }, /* efika */ {} }; + u64 regaddr64 = 0; + const u32 *regaddr_p; /* deep sleep? let mpc52xx code handle that */ if (lite5200_pm_target_state == PM_SUSPEND_STANDBY) @@ -60,8 +63,12 @@ static int lite5200_pm_prepare(void) /* map registers */ np = of_find_matching_node(NULL, immr_ids); - mbar = of_iomap(np, 0); + regaddr_p = of_get_address(np, 0, NULL, NULL); + if (regaddr_p) + regaddr64 = of_translate_address(np, regaddr_p); of_node_put(np); + + mbar = ioremap((u32) regaddr64, 0xC000); if (!mbar) { printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__); return -ENOSYS; @@ -71,6 +78,7 @@ static int lite5200_pm_prepare(void) pic = mbar + 0x500; gps = mbar + 0xb00; gpw = mbar + 0xc00; + pci = mbar + 0xd00; bes = mbar + 0x1200; xlb = mbar + 0x1f00; sram = mbar + 0x8000; @@ -85,6 +93,7 @@ static struct mpc52xx_sdma sbes; static struct mpc52xx_xlb sxlb; static struct mpc52xx_gpio sgps; static struct mpc52xx_gpio_wkup sgpw; +static char spci[0x200]; static void lite5200_save_regs(void) { @@ -94,6 +103,7 @@ static void lite5200_save_regs(void) _memcpy_fromio(&sxlb, xlb, sizeof(*xlb)); _memcpy_fromio(&sgps, gps, sizeof(*gps)); _memcpy_fromio(&sgpw, gpw, sizeof(*gpw)); + _memcpy_fromio(spci, pci, 0x200); _memcpy_fromio(saved_sram, sram, sram_size); } @@ -103,6 +113,8 @@ static void lite5200_restore_regs(void) int i; _memcpy_toio(sram, saved_sram, sram_size); + /* PCI Configuration */ + _memcpy_toio(pci, spci, 0x200); /* * GPIOs. Interrupt Master Enable has higher address then other diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e0edaaa6920a..bb0c0d0f6db7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64 || (X86_SMP && !X86_VOYAGER) + def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP @@ -230,6 +230,26 @@ config SMP If you don't know what to do here, say N. +config X86_FIND_SMP_CONFIG + def_bool y + depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS + +if ACPI +config X86_MPPARSE + def_bool y + bool "Enable MPS table" + depends on X86_LOCAL_APIC && !X86_VISWS + help + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it +endif + +if !ACPI +config X86_MPPARSE + def_bool y + depends on X86_LOCAL_APIC && !X86_VISWS +endif + choice prompt "Subarchitecture Type" default X86_PC @@ -251,7 +271,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) + depends on X86_32 && (SMP || BROKEN) && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -261,39 +281,9 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 - select NUMA - help - This option is used for getting Linux to run on a (IBM/Sequent) NUMA - multiquad box. This changes the way that processors are bootstrapped, - and uses Clustered Logical APIC addressing mode instead of Flat Logical. - You will need a new lynxer.elf file to flash your firmware with - send - email to <[email protected]>. - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - - If you don't have one of these computers, you should say N here. - If you want to build a NUMA kernel, you must select ACPI. - -config X86_BIGSMP - bool "Support for other sub-arch SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - - If you don't have such a system, you should say N here. - config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 + depends on X86_32 && !PCI help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -304,12 +294,33 @@ config X86_VISWS and vice versa. See <file:Documentation/sgi-visws.txt> for details. config X86_GENERICARCH - bool "Generic architecture (Summit, bigsmp, ES7000, default)" + bool "Generic architecture" depends on X86_32 help - This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. - It is intended for a generic binary kernel. - If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +if X86_GENERICARCH + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on SMP && X86_32 && PCI && X86_MPPARSE + select NUMA + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to <[email protected]>. + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" @@ -317,8 +328,15 @@ config X86_ES7000 help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. - Only choose this option if you have such a system, otherwise you - should say N here. + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + +endif config X86_RDC321X bool "RDC R-321x SoC" @@ -337,7 +355,7 @@ config X86_RDC321X config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT - depends on X86_64 + depends on X86_64 && !PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -417,51 +435,33 @@ config PARAVIRT_CLOCK endif -config MEMTEST_BOOTPARAM - bool "Memtest boot parameter" +config PARAVIRT_DEBUG + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + help + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. + +config MEMTEST + bool "Memtest" depends on X86_64 default y help This option adds a kernel parameter 'memtest', which allows memtest - to be disabled at boot. If this option is selected, memtest - functionality can be disabled with memtest=0 on the kernel - command line. The purpose of this option is to allow a single - kernel image to be distributed with memtest built in, but not - necessarily enabled. - + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer Y. -config MEMTEST_BOOTPARAM_VALUE - int "Memtest boot parameter default value (0-4)" - depends on MEMTEST_BOOTPARAM - range 0 4 - default 0 - help - This option sets the default value for the kernel parameter - 'memtest', which allows memtest to be disabled at boot. If this - option is set to 0 (zero), the memtest kernel parameter will - default to 0, disabling memtest at bootup. If this option is - set to 4, the memtest kernel parameter will default to 4, - enabling memtest at bootup, and use that as pattern number. - - If you are unsure how to answer this question, answer 0. - -config ACPI_SRAT - def_bool y - depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) - select ACPI_NUMA - -config HAVE_ARCH_PARSE_SRAT - def_bool y - depends on ACPI_SRAT - config X86_SUMMIT_NUMA def_bool y - depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) + depends on X86_32 && NUMA && X86_GENERICARCH config X86_CYCLONE_TIMER def_bool y - depends on X86_32 && X86_SUMMIT || X86_GENERICARCH + depends on X86_GENERICARCH config ES7000_CLUSTERED_APIC def_bool y @@ -549,6 +549,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. +config AMD_IOMMU + bool "AMD IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && ACPI + help + With this option you can enable support for AMD IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an AMD IOMMU you + can isolate the the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + + You can find out if your system has an AMD IOMMU if you look into + your BIOS for an option to enable it or if you have an IVRS ACPI + table. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -561,20 +576,35 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) +config MAXSMP + bool "Configure Maximum number of SMP Processors and NUMA Nodes" + depends on X86_64 && SMP + default n + help + Configure maximum number of CPUS and NUMA Nodes for this architecture. + If unsure, say N. + +if MAXSMP +config NR_CPUS + int + default "4096" +endif +if !MAXSMP config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 + int "Maximum number of CPUs (2-4096)" + range 2 4096 depends on SMP default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 default "8" help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 255 and the + kernel will support. The maximum supported value is 4096 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +endif config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -911,9 +941,9 @@ config X86_PAE config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC - default y if (X86_NUMAQ || X86_SUMMIT) + default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the @@ -965,13 +995,25 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. +if MAXSMP + +config NODES_SHIFT + int + default "9" +endif + +if !MAXSMP config NODES_SHIFT - int "Max num nodes shift(1-15)" - range 1 15 if X86_64 + int "Maximum NUMA Nodes (as a power of 2)" + range 1 9 if X86_64 default "6" if X86_64 default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accomodate various tables. +endif config HAVE_ARCH_BOOTMEM_NODE def_bool y @@ -1090,6 +1132,40 @@ config MTRR See <file:Documentation/mtrr.txt> for more information. +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so some X driver + could add WB entries. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots). + + Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size + could be used to send largest mtrr entry size for continuous block + to hold holes (aka. UC entries) + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup default value + +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + help + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr= + config X86_PAT bool prompt "x86 PAT support" @@ -1190,7 +1266,6 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps (EXPERIMENTAL)" - depends on EXPERIMENTAL depends on X86_64 || (X86_32 && HIGHMEM) help Generate crash dump after being started by kexec. @@ -1475,8 +1550,7 @@ endmenu menu "Bus options (PCI etc.)" config PCI - bool "PCI support" if !X86_VISWS && !X86_VSMP - depends on !X86_VOYAGER + bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301849a1..abff1b84ed5b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -344,7 +344,7 @@ config X86_F00F_BUG config X86_WP_WORKS_OK def_bool y - depends on X86_32 && !M386 + depends on !M386 config X86_INVLPG def_bool y @@ -399,6 +399,10 @@ config X86_TSC def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 + # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 18363374d51a..acc0271920f2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -20,6 +20,14 @@ config NONPROMISC_DEVMEM If in doubt, say Y. +config X86_VERBOSE_BOOTUP + bool "Enable verbose x86 bootup info messages" + default y + help + Enables the informational output from the decompression stage + (e.g. bzImage) of the boot. If you disable this you will still + see errors. Disable this if you want silent bootup. + config EARLY_PRINTK bool "Early printk" if EMBEDDED default y @@ -60,7 +68,7 @@ config DEBUG_PAGEALLOC config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL - depends on X86_64_SMP + depends on X86_SMP default n help Say Y to verify that the per_cpu map being accessed has @@ -129,15 +137,6 @@ config 4KSTACKS on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_LOCAL_APIC || X86_VOYAGER - depends on X86_32 - -config X86_MPPARSE - def_bool y - depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf3..b03d24b44bf9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ -# NUMAQ subarch support -mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq -mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/ - -# BIGSMP subarch support -mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp -mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/ - -#Summit subarch support -mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit -mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/ - # generic subarchitecture mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ - -# ES7000 subarch support -mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 -fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ -mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/ - # RDC R-321x subarch support mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ @@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y) head-y := arch/x86/kernel/head_$(BITS).o head-y += arch/x86/kernel/head$(BITS).o +head-y += arch/x86/kernel/head.o head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ @@ -210,12 +193,12 @@ all: bzImage # KBUILD_IMAGE specify target image being built KBUILD_IMAGE := $(boot)/bzImage -zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage +zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage zImage bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot - $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ compressed: zImage diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index e01aafd03bde..4063d630deff 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -1,7 +1,7 @@ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -95,6 +95,9 @@ static void enable_a20_kbc(void) outb(0xdf, 0x60); /* A20 on */ empty_8042(); + + outb(0xff, 0x64); /* Null command, but UHCI wants it */ + empty_8042(); } static void enable_a20_fast(void) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d8819efac81d..1d5dff4123e1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -30,6 +30,7 @@ #include <asm/page.h> #include <asm/boot.h> #include <asm/msr.h> +#include <asm/processor-flags.h> #include <asm/asm-offsets.h> .section ".text.head" @@ -109,7 +110,7 @@ startup_32: /* Enable PAE mode */ xorl %eax, %eax - orl $(1 << 5), %eax + orl $(X86_CR4_PAE), %eax movl %eax, %cr4 /* @@ -170,7 +171,7 @@ startup_32: pushl %eax /* Enter paged protected Mode, activating Long Mode */ - movl $0x80000001, %eax /* Enable Paging and Protected mode */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ movl %eax, %cr0 /* Jump from 32bit compatibility mode into 64bit mode. */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 90456cee47c3..bc5553b496f7 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -30,6 +30,7 @@ #include <asm/io.h> #include <asm/page.h> #include <asm/boot.h> +#include <asm/bootparam.h> /* WARNING!! * This code is compiled with -fPIC and it is relocated dynamically @@ -187,13 +188,8 @@ static void gzip_release(void **); /* * This is set up by the setup-routine at boot-time */ -static unsigned char *real_mode; /* Pointer to real-mode data */ - -#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) -#ifndef STANDARD_MEMORY_BIOS_CALL -#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) -#endif -#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) +static struct boot_params *real_mode; /* Pointer to real-mode data */ +static int quiet; extern unsigned char input_data[]; extern int input_len; @@ -206,7 +202,8 @@ static void free(void *where); static void *memset(void *s, int c, unsigned n); static void *memcpy(void *dest, const void *src, unsigned n); -static void putstr(const char *); +static void __putstr(int, const char *); +#define putstr(__x) __putstr(0, __x) #ifdef CONFIG_X86_64 #define memptr long @@ -221,10 +218,6 @@ static char *vidmem; static int vidport; static int lines, cols; -#ifdef CONFIG_X86_NUMAQ -void *xquad_portio; -#endif - #include "../../../../lib/inflate.c" static void *malloc(int size) @@ -270,18 +263,24 @@ static void scroll(void) vidmem[i] = ' '; } -static void putstr(const char *s) +static void __putstr(int error, const char *s) { int x, y, pos; char c; +#ifndef CONFIG_X86_VERBOSE_BOOTUP + if (!error) + return; +#endif + #ifdef CONFIG_X86_32 - if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) + if (real_mode->screen_info.orig_video_mode == 0 && + lines == 0 && cols == 0) return; #endif - x = RM_SCREEN_INFO.orig_x; - y = RM_SCREEN_INFO.orig_y; + x = real_mode->screen_info.orig_x; + y = real_mode->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -302,8 +301,8 @@ static void putstr(const char *s) } } - RM_SCREEN_INFO.orig_x = x; - RM_SCREEN_INFO.orig_y = y; + real_mode->screen_info.orig_x = x; + real_mode->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -366,9 +365,9 @@ static void flush_window(void) static void error(char *x) { - putstr("\n\n"); - putstr(x); - putstr("\n\n -- System halted"); + __putstr(1, "\n\n"); + __putstr(1, x); + __putstr(1, "\n\n -- System halted"); while (1) asm("hlt"); @@ -395,7 +394,8 @@ static void parse_elf(void *output) return; } - putstr("Parsing ELF... "); + if (!quiet) + putstr("Parsing ELF... "); phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); if (!phdrs) @@ -430,7 +430,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; - if (RM_SCREEN_INFO.orig_video_mode == 7) { + if (real_mode->hdr.loadflags & QUIET_FLAG) + quiet = 1; + + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -438,8 +441,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = RM_SCREEN_INFO.orig_video_lines; - cols = RM_SCREEN_INFO.orig_video_cols; + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ @@ -465,9 +468,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, #endif makecrc(); - putstr("\nDecompressing Linux... "); + if (!quiet) + putstr("\nDecompressing Linux... "); gunzip(); parse_elf(output); - putstr("done.\nBooting the kernel.\n"); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index edaadea90aaf..a1310c52fc0c 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -10,16 +10,20 @@ #define USE_BSD #include <endian.h> -#define MAX_SHDRS 100 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; -static Elf32_Shdr shdr[MAX_SHDRS]; -static Elf32_Sym *symtab[MAX_SHDRS]; -static Elf32_Rel *reltab[MAX_SHDRS]; -static char *strtab[MAX_SHDRS]; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; +struct section { + Elf32_Shdr shdr; + struct section *link; + Elf32_Sym *symtab; + Elf32_Rel *reltab; + char *strtab; +}; +static struct section *secs; + /* * Following symbols have been audited. There values are constant and do * not change if bzImage is loaded at a different physical address than @@ -35,7 +39,7 @@ static int is_safe_abs_reloc(const char* sym_name) { int i; - for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { + for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { if (!strcmp(sym_name, safe_abs_relocs[i])) /* Match found */ return 1; @@ -137,10 +141,10 @@ static const char *sec_name(unsigned shndx) { const char *sec_strtab; const char *name; - sec_strtab = strtab[ehdr.e_shstrndx]; + sec_strtab = secs[ehdr.e_shstrndx].strtab; name = "<noname>"; if (shndx < ehdr.e_shnum) { - name = sec_strtab + shdr[shndx].sh_name; + name = sec_strtab + secs[shndx].shdr.sh_name; } else if (shndx == SHN_ABS) { name = "ABSOLUTE"; @@ -159,7 +163,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) name = sym_strtab + sym->st_name; } else { - name = sec_name(shdr[sym->st_shndx].sh_name); + name = sec_name(secs[sym->st_shndx].shdr.sh_name); } return name; } @@ -244,29 +248,34 @@ static void read_ehdr(FILE *fp) static void read_shdrs(FILE *fp) { int i; - if (ehdr.e_shnum > MAX_SHDRS) { - die("%d section headers supported: %d\n", - ehdr.e_shnum, MAX_SHDRS); + Elf32_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) { + die("Unable to allocate %d section headers\n", + ehdr.e_shnum); } if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); } - if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { - die("Cannot read ELF section headers: %s\n", - strerror(errno)); - } - for(i = 0; i < ehdr.e_shnum; i++) { - shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); - shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); - shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); - shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); - shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); - shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); - shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); - shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); - shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); - shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (fread(&shdr, sizeof shdr, 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; } } @@ -274,20 +283,22 @@ static void read_shdrs(FILE *fp) static void read_strtabs(FILE *fp) { int i; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_STRTAB) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_STRTAB) { continue; } - strtab[i] = malloc(shdr[i].sh_size); - if (!strtab[i]) { + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) { die("malloc of %d bytes for strtab failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } @@ -297,28 +308,31 @@ static void read_strtabs(FILE *fp) static void read_symtabs(FILE *fp) { int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_SYMTAB) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - symtab[i] = malloc(shdr[i].sh_size); - if (!symtab[i]) { + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) { die("malloc of %d bytes for symtab failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { - symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); - symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); - symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); - symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym = &sec->symtab[j]; + sym->st_name = elf32_to_cpu(sym->st_name); + sym->st_value = elf32_to_cpu(sym->st_value); + sym->st_size = elf32_to_cpu(sym->st_size); + sym->st_shndx = elf16_to_cpu(sym->st_shndx); } } } @@ -327,26 +341,29 @@ static void read_symtabs(FILE *fp) static void read_relocs(FILE *fp) { int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_REL) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { continue; } - reltab[i] = malloc(shdr[i].sh_size); - if (!reltab[i]) { + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) { die("malloc of %d bytes for relocs failed\n", - shdr[i].sh_size); + sec->shdr.sh_size); } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); + sec->shdr.sh_offset, strerror(errno)); } - if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { die("Cannot read symbol table: %s\n", strerror(errno)); } - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { - reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); - reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; + rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_info = elf32_to_cpu(rel->r_info); } } } @@ -357,19 +374,21 @@ static void print_absolute_symbols(void) int i; printf("Absolute symbols\n"); printf(" Num: Value Size Type Bind Visibility Name\n"); - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; char *sym_strtab; Elf32_Sym *sh_symtab; int j; - if (shdr[i].sh_type != SHT_SYMTAB) { + + if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - sh_symtab = symtab[i]; - sym_strtab = strtab[shdr[i].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + sh_symtab = sec->symtab; + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { Elf32_Sym *sym; const char *name; - sym = &symtab[i][j]; + sym = &sec->symtab[j]; name = sym_name(sym_strtab, sym); if (sym->st_shndx != SHN_ABS) { continue; @@ -389,26 +408,27 @@ static void print_absolute_relocs(void) { int i, printed = 0; - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + struct section *sec_applies, *sec_symtab; char *sym_strtab; Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; int j; - if (shdr[i].sh_type != SHT_REL) { + if (sec->shdr.sh_type != SHT_REL) { continue; } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; const char *name; - rel = &reltab[i][j]; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; name = sym_name(sym_strtab, sym); if (sym->st_shndx != SHN_ABS) { @@ -456,26 +476,28 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) { int i; /* Walk through the relocations */ - for(i = 0; i < ehdr.e_shnum; i++) { + for (i = 0; i < ehdr.e_shnum; i++) { char *sym_strtab; Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; + struct section *sec_applies, *sec_symtab; int j; - if (shdr[i].sh_type != SHT_REL) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL) { continue; } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + sh_symtab = sec_symtab->symtab; + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; unsigned r_type; - rel = &reltab[i][j]; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); /* Don't visit relocations to absolute symbols */ @@ -539,7 +561,7 @@ static void emit_relocs(int as_text) */ printf(".section \".data.reloc\",\"a\"\n"); printf(".balign 4\n"); - for(i = 0; i < reloc_count; i++) { + for (i = 0; i < reloc_count; i++) { printf("\t .long 0x%08lx\n", relocs[i]); } printf("\n"); @@ -550,7 +572,7 @@ static void emit_relocs(int as_text) /* Print a stop */ printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); /* Now print each relocation */ - for(i = 0; i < reloc_count; i++) { + for (i = 0; i < reloc_count; i++) { buf[0] = (relocs[i] >> 0) & 0xff; buf[1] = (relocs[i] >> 8) & 0xff; buf[2] = (relocs[i] >> 16) & 0xff; @@ -577,7 +599,7 @@ int main(int argc, char **argv) show_absolute_relocs = 0; as_text = 0; fname = NULL; - for(i = 1; i < argc; i++) { + for (i = 1; i < argc; i++) { char *arg = argv[i]; if (*arg == '-') { if (strcmp(argv[1], "--abs-syms") == 0) { diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 00e19edd852c..92d6fd73dc7d 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -28,6 +28,8 @@ static char *cpu_name(int level) if (level == 64) { return "x86-64"; } else { + if (level == 15) + level = 6; sprintf(buf, "i%d86", level); return buf; } diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 77569a4a3be1..2296164b54d2 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -165,6 +165,10 @@ void main(void) /* Set the video mode */ set_video(); + /* Parse command line for 'quiet' and pass it to decompressor. */ + if (cmdline_find_option_bool("quiet")) + boot_params.hdr.loadflags |= QUIET_FLAG; + /* Do the last things and invoke protected mode */ go_to_protected_mode(); } diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32eb4290..53165c97336b 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -13,6 +13,7 @@ */ #include "boot.h" +#include <linux/kernel.h> #define SMAP 0x534d4150 /* ASCII "SMAP" */ @@ -53,7 +54,7 @@ static int detect_memory_e820(void) count++; desc++; - } while (next && count < E820MAX); + } while (next && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index ab049d40a884..141b6e20ed31 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -33,6 +33,8 @@ protected_mode_jump: movw %cs, %bx shll $4, %ebx addl %ebx, 2f + jmp 1f # Short jump to serialize on 386/486 +1: movw $__BOOT_DS, %cx movw $__BOOT_TSS, %di @@ -40,8 +42,6 @@ protected_mode_jump: movl %cr0, %edx orb $X86_CR0_PE, %dl # Protected mode movl %edx, %cr0 - jmp 1f # Short jump to serialize on 386/486 -1: # Transition to 32-bit mode .byte 0x66, 0xea # ljmpl opcode diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 40ecb8d7688c..b939cb476dec 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -259,8 +259,7 @@ static int vga_probe(void) return mode_count[adapter]; } -__videocard video_vga = -{ +__videocard video_vga = { .card_name = "VGA", .probe = vga_probe, .set_mode = vga_set_mode, diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index ad7ddaaff588..9bc34e2033ec 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,54 +1,103 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:59:02 2008 # +# CONFIG_64BIT is not set CONFIG_X86_32=y +# CONFIG_X86_64 is not set +CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y -CONFIG_X86=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_DMI=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +# CONFIG_GENERIC_TIME_VSYSCALL is not set +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_ZONE_DMA32 is not set +CONFIG_ARCH_POPULATES_NODE_MAP=y +# CONFIG_AUDIT_ARCH is not set +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_32_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -56,13 +105,15 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -76,6 +127,17 @@ CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -87,10 +149,10 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LBD is not set +CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_LSF is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_BSG=y # # IO Schedulers @@ -103,7 +165,8 @@ CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features @@ -111,18 +174,21 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y -# CONFIG_X86_PC is not set +CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_NUMAQ is not set # CONFIG_X86_SUMMIT is not set # CONFIG_X86_BIGSMP is not set # CONFIG_X86_VISWS is not set -CONFIG_X86_GENERICARCH=y +# CONFIG_X86_GENERICARCH is not set # CONFIG_X86_ES7000 is not set -# CONFIG_PARAVIRT is not set -CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_X86_RDC321X is not set +# CONFIG_X86_VSMP is not set +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +# CONFIG_PARAVIRT_GUEST is not set # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -130,9 +196,8 @@ CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M586MMX is not set # CONFIG_M686 is not set # CONFIG_MPENTIUMII is not set -CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set -# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -147,14 +212,14 @@ CONFIG_MPENTIUMIII=y # CONFIG_MCYRIXIII is not set # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set -CONFIG_X86_GENERIC=y +# CONFIG_MPSC is not set +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CPU=y CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_XADD=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y CONFIG_X86_BSWAP=y @@ -162,106 +227,120 @@ CONFIG_X86_POPAD_OK=y CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_X86_MINIMUM_CPU_FAMILY=6 +CONFIG_X86_DEBUGCTLMSR=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y -CONFIG_NR_CPUS=32 -CONFIG_SCHED_SMT=y +CONFIG_DMI=y +# CONFIG_IOMMU_HELPER is not set +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_NONFATAL=y -CONFIG_X86_MCE_P4THERMAL=y +# CONFIG_X86_MCE is not set CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set # CONFIG_X86_REBOOTFIXUPS is not set -CONFIG_MICROCODE=y -CONFIG_MICROCODE_OLD_INTERFACE=y +# CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -# CONFIG_DELL_RBU is not set -# CONFIG_DCDBAS is not set -CONFIG_DMIID=y # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y -CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_NEED_NODE_MEMMAP_SIZE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y -CONFIG_NR_QUICK=1 CONFIG_VIRT_TO_BUS=y # CONFIG_HIGHPTE is not set # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y -# CONFIG_EFI is not set +# CONFIG_X86_PAT is not set +CONFIG_EFI=y # CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x100000 -# CONFIG_HOTPLUG_CPU is not set -CONFIG_COMPAT_VDSO=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # -# Power management options (ACPI, APM) +# Power management options # CONFIG_PM=y -CONFIG_PM_LEGACY=y -# CONFIG_PM_DEBUG is not set - -# -# ACPI (Advanced Configuration and Power Interface) Support -# +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set -CONFIG_ACPI_BLACKLIST_YEAR=2001 -CONFIG_ACPI_DEBUG=y +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y -# CONFIG_ACPI_CONTAINER is not set +CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_SBS is not set # CONFIG_APM is not set @@ -271,15 +350,17 @@ CONFIG_X86_PM_TIMER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers @@ -287,8 +368,7 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_X86_ACPI_CPUFREQ=y # CONFIG_X86_POWERNOW_K6 is not set # CONFIG_X86_POWERNOW_K7 is not set -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_POWERNOW_K8 is not set # CONFIG_X86_GX_SUSPMOD is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_X86_SPEEDSTEP_ICH is not set @@ -302,43 +382,72 @@ CONFIG_X86_POWERNOW_K8_ACPI=y # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # -# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# Bus options (PCI etc.) # CONFIG_PCI=y # CONFIG_PCI_GOBIOS is not set # CONFIG_PCI_GOMMCONFIG is not set # CONFIG_PCI_GODIRECT is not set CONFIG_PCI_GOANY=y +# CONFIG_PCI_GOOLPC is not set CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y -# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_DOMAINS=y +CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set +CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set +CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set # CONFIG_SCx200 is not set +# CONFIG_OLPC is not set CONFIG_K8_NB=y - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set - -# -# Executable file formats +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_IBM is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# Executable file formats / Emulations # CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_AOUT is not set -# CONFIG_BINFMT_MISC is not set +CONFIG_BINFMT_MISC=y # # Networking @@ -349,59 +458,142 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set +CONFIG_XFRM_USER=y # CONFIG_XFRM_SUB_POLICY is not set # CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set CONFIG_INET_TUNNEL=y -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set CONFIG_INET6_XFRM_MODE_TRANSPORT=y CONFIG_INET6_XFRM_MODE_TUNNEL=y -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -409,6 +601,7 @@ CONFIG_IPV6_SIT=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -416,28 +609,99 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -449,13 +713,15 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -466,7 +732,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -479,8 +745,8 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -489,73 +755,17 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_BLK_DEV_ATIIXP is not set -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_CS5535 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -# CONFIG_BLK_DEV_PDC202XX_NEW is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set # # SCSI device support @@ -564,8 +774,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -574,7 +784,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -582,7 +792,7 @@ CONFIG_CHR_DEV_SG=y # Some SCSI devices (e.g. CD jukebox) support multiple LUNs # # CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set +CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set CONFIG_SCSI_WAIT_SCAN=m @@ -591,81 +801,37 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -CONFIG_BLK_DEV_3W_XXXX_RAID=y -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_DEBUG_ENABLE=y -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set CONFIG_ATA_PIIX=y # CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y +# CONFIG_SATA_NV is not set # CONFIG_PDC_ADMA is not set # CONFIG_SATA_QSTOR is not set # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y -# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y +# CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set +CONFIG_PATA_AMD=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set @@ -673,6 +839,7 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_CS5520 is not set # CONFIG_PATA_CS5530 is not set # CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CS5536 is not set # CONFIG_PATA_CYPRESS is not set # CONFIG_PATA_EFAR is not set # CONFIG_ATA_GENERIC is not set @@ -686,11 +853,14 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RZ1000 is not set @@ -702,65 +872,42 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -770,38 +917,40 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -814,34 +963,75 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -CONFIG_R8169=y -# CONFIG_R8169_NAPI is not set +# CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set -# CONFIG_TR is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_IBMLS is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -850,16 +1040,27 @@ CONFIG_NETDEV_10000=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set +# CONFIG_PCMCIA_IBMTR is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -870,18 +1071,17 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -906,17 +1106,63 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_WISTRON_BTNS is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -929,8 +1175,26 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -940,9 +1204,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -951,89 +1220,275 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set CONFIG_HW_RANDOM_GEODE=y CONFIG_HW_RANDOM_VIA=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set -CONFIG_AGP=y -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_ATI is not set -# CONFIG_AGP_AMD is not set -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_NVIDIA is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_EFFICEON is not set -# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set # CONFIG_NSC_GPIO is not set # CONFIG_CS5535_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set - -# -# SPI support -# +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_SBC7240_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_AGP=y +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_I810 is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_CYBLA is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -1043,33 +1498,167 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SIS7019 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -1077,18 +1666,18 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -1121,8 +1710,10 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_LIBUSUAL is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1134,10 +1725,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1163,90 +1750,125 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # -# USB DSL modem support +# LED drivers # +# CONFIG_LEDS_CLEVO_MAIL is not set # -# USB Gadget Support +# LED Triggers # -# CONFIG_USB_GADGET is not set -# CONFIG_MMC is not set +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_EDAC=y # -# LED devices +# Reporting subsystems # -# CONFIG_NEW_LEDS is not set +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set # -# LED drivers +# RTC interfaces # +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# LED Triggers +# I2C RTC drivers # -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set # -# Real Time Clock +# SPI RTC drivers # -# CONFIG_RTC_CLASS is not set # -# DMA Engine support +# Platform RTC drivers # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set # -# DMA Clients +# on-CPU RTC drivers # +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set +# CONFIG_INTEL_IOATDMA is not set +# CONFIG_UIO is not set # -# Userspace I/O +# Firmware Drivers # -# CONFIG_UIO is not set +# CONFIG_EDD is not set +CONFIG_EFI_VARS=y +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1256,8 +1878,8 @@ CONFIG_GENERIC_ACL=y # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set +CONFIG_JOLIET=y +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1275,13 +1897,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1289,6 +1911,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1296,33 +1919,15 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1332,14 +1937,26 @@ CONFIG_SUNRPC=y # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1374,37 +1991,33 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set -CONFIG_INSTRUMENTATION=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y # # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y -# CONFIG_DEBUG_FS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1419,48 +2032,174 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_RODATA is not set +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_X86_PTDUMP is not set +CONFIG_DEBUG_RODATA=y +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m # CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_DOUBLEFAULT=y +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_586 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_LGUEST is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_X86_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_BIOS_REBOOT=y -CONFIG_X86_TRAMPOLINE=y -CONFIG_KTIME_SCALAR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2d6f5b2809d2..ae5124e064d4 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,64 +1,103 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:59:57 2008 # -CONFIG_X86_64=y CONFIG_64BIT=y +# CONFIG_X86_32 is not set +CONFIG_X86_64=y CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_ZONE_DMA32=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y -CONFIG_NR_QUICK=2 -CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_DMI=y -CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ZONE_DMA32=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_AUDIT_ARCH=y +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_64_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +# CONFIG_KTIME_SCALAR is not set # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -66,13 +105,15 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -82,9 +123,21 @@ CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y # CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -96,14 +149,15 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_BSG=y +CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_AS is not set @@ -111,107 +165,177 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features # +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set +# CONFIG_PARAVIRT_GUEST is not set +CONFIG_MEMTEST_BOOTPARAM=y +CONFIG_MEMTEST_BOOTPARAM_VALUE=0 +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set # CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 -CONFIG_X86_TSC=y +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +CONFIG_X86_CPU=y +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_GOOD_APIC=y -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=y -CONFIG_X86_CPUID=y -CONFIG_X86_HT=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_SCHED_SMT=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=64 +CONFIG_X86_DEBUGCTLMSR=y +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_DMI=y +CONFIG_GART_IOMMU=y +CONFIG_CALGARY_IOMMU=y +CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y +CONFIG_SWIOTLB=y +CONFIG_IOMMU_HELPER=y +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +# CONFIG_X86_MCE is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y CONFIG_NUMA=y CONFIG_K8_NUMA=y -CONFIG_NODES_SHIFT=6 CONFIG_X86_64_ACPI_NUMA=y -CONFIG_NUMA_EMU=y +CONFIG_NODES_SPAN_OTHER_NODES=y +# CONFIG_NUMA_EMU is not set +CONFIG_NODES_SHIFT=6 +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_VMEMMAP=y + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y -CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y -CONFIG_NR_CPUS=32 -CONFIG_PHYSICAL_ALIGN=0x200000 -CONFIG_HOTPLUG_CPU=y -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_GART_IOMMU=y -# CONFIG_CALGARY_IOMMU is not set -CONFIG_SWIOTLB=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_START=0x200000 +CONFIG_MTRR=y +# CONFIG_X86_PAT is not set +CONFIG_EFI=y CONFIG_SECCOMP=y -# CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_K8_NB=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_ISA_DMA_API=y -CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # # Power management options # +CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y -# CONFIG_PM_LEGACY is not set -# CONFIG_PM_DEBUG is not set +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" - -# -# ACPI (Advanced Configuration and Power Interface) Support -# CONFIG_ACPI=y CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SLEEP_PROC_FS=y -CONFIG_ACPI_SLEEP_PROC_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set +# CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y @@ -227,29 +351,34 @@ CONFIG_ACPI_CONTAINER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers # -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y -# CONFIG_X86_SPEEDSTEP_CENTRINO is not set CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K8 is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_P4_CLOCKMOD is not set # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # # Bus options (PCI etc.) @@ -257,27 +386,56 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_DOMAINS=y +CONFIG_DMAR=y +CONFIG_DMAR_GFX_WA=y +CONFIG_DMAR_FLOPPY_WA=y CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +CONFIG_K8_NB=y +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set # # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set +CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y -CONFIG_IA32_AOUT=y +# CONFIG_IA32_AOUT is not set CONFIG_COMPAT=y +CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y # @@ -289,22 +447,31 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set @@ -313,31 +480,109 @@ CONFIG_INET_TUNNEL=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET6_XFRM_MODE_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -345,6 +590,7 @@ CONFIG_IPV6_SIT=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -352,28 +598,99 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -385,13 +702,15 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -402,7 +721,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -415,8 +734,8 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -425,72 +744,16 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_ATIIXP=y -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set # # SCSI device support @@ -499,8 +762,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -509,7 +772,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -526,73 +789,37 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set -CONFIG_SCSI_SAS_ATTRS=y +# CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set CONFIG_ATA_PIIX=y # CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y +# CONFIG_SATA_NV is not set # CONFIG_PDC_ADMA is not set # CONFIG_SATA_QSTOR is not set # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y -# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y +# CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set +CONFIG_PATA_AMD=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set @@ -612,11 +839,14 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RZ1000 is not set @@ -628,65 +858,42 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set -CONFIG_TUN=y +# CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -696,39 +903,40 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set -CONFIG_AMD8111_ETH=y -# CONFIG_AMD8111E_NAPI is not set +# CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -740,34 +948,74 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set -# CONFIG_SKY2 is not set +CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set -CONFIG_S2IO=m -# CONFIG_S2IO_NAPI is not set +# CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set -# CONFIG_TR is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -776,16 +1024,26 @@ CONFIG_S2IO=m # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -796,18 +1054,17 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -832,17 +1089,62 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -855,8 +1157,26 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -866,9 +1186,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -877,78 +1202,260 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set - -# -# SPI support -# +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -958,33 +1465,165 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -992,18 +1631,18 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -1036,8 +1675,10 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_LIBUSUAL is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1049,10 +1690,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1078,98 +1715,126 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # -# USB DSL modem support +# LED drivers # +# CONFIG_LEDS_CLEVO_MAIL is not set # -# USB Gadget Support +# LED Triggers # -# CONFIG_USB_GADGET is not set -# CONFIG_MMC is not set +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_EDAC=y # -# LED devices +# Reporting subsystems # -# CONFIG_NEW_LEDS is not set +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set # -# LED drivers +# RTC interfaces # +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# LED Triggers +# I2C RTC drivers # -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set # -# Real Time Clock +# SPI RTC drivers # -# CONFIG_RTC_CLASS is not set # -# DMA Engine support +# Platform RTC drivers # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set # -# DMA Clients +# on-CPU RTC drivers # +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set - -# -# Userspace I/O -# +# CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set # # Firmware Drivers # # CONFIG_EDD is not set +CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1180,7 +1845,7 @@ CONFIG_GENERIC_ACL=y # CONFIG_ISO9660_FS=y CONFIG_JOLIET=y -# CONFIG_ZISOFS is not set +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1198,13 +1863,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1212,6 +1877,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1219,33 +1885,15 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1255,14 +1903,26 @@ CONFIG_SUNRPC=y # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1297,40 +1957,33 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set # -# Instrumentation Support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y - -# # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y +# CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1344,28 +1997,162 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set -# CONFIG_DEBUG_RODATA is not set -# CONFIG_IOMMU_DEBUG is not set +# CONFIG_LATENCYTOP is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set +CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_X86_PTDUMP is not set +CONFIG_DEBUG_RODATA=y +# CONFIG_DIRECT_GBPAGES is not set +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m +CONFIG_X86_MPPARSE=y +# CONFIG_IOMMU_DEBUG is not set +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_X86_64 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_X86_64 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_X86_64 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index b5e329da166c..20371d0635e4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -61,6 +61,19 @@ CFI_UNDEFINED r15 .endm +#ifdef CONFIG_PARAVIRT +ENTRY(native_usergs_sysret32) + swapgs + sysretl +ENDPROC(native_usergs_sysret32) + +ENTRY(native_irq_enable_sysexit) + swapgs + sti + sysexit +ENDPROC(native_irq_enable_sysexit) +#endif + /* * 32bit SYSENTER instruction entry. * @@ -85,14 +98,14 @@ ENTRY(ia32_sysenter_target) CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp - swapgs + SWAPGS_UNSAFE_STACK movq %gs:pda_kernelstack, %rsp addq $(PDA_STACKOFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS CFI_ADJUST_CFA_OFFSET 8 @@ -103,7 +116,7 @@ ENTRY(ia32_sysenter_target) pushfq CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET rflags,0*/ - movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d + movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d CFI_REGISTER rip,r10 pushq $__USER32_CS CFI_ADJUST_CFA_OFFSET 8 @@ -123,8 +136,9 @@ ENTRY(ia32_sysenter_target) .quad 1b,ia32_badarg .previous GET_THREAD_INFO(%r10) - orl $TS_COMPAT,threadinfo_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + orl $TS_COMPAT,TI_status(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ + TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys sysenter_do_call: @@ -134,11 +148,11 @@ sysenter_do_call: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r10) jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) + andl $~TS_COMPAT,TI_status(%r10) /* clear IF, that popfq doesn't enable interrupts early */ andl $~0x200,EFLAGS-R11(%rsp) movl RIP-R11(%rsp),%edx /* User %eip */ @@ -151,10 +165,7 @@ sysenter_do_call: CFI_ADJUST_CFA_OFFSET -8 CFI_REGISTER rsp,rcx TRACE_IRQS_ON - swapgs - sti /* sti only takes effect after the next instruction */ - /* sysexit */ - .byte 0xf, 0x35 + ENABLE_INTERRUPTS_SYSEXIT32 sysenter_tracesys: CFI_RESTORE_STATE @@ -200,7 +211,7 @@ ENTRY(ia32_cstar_target) CFI_DEF_CFA rsp,PDA_STACKOFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ - swapgs + SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 movq %gs:pda_kernelstack,%rsp @@ -208,7 +219,7 @@ ENTRY(ia32_cstar_target) * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS(CLBR_NONE) SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) @@ -230,8 +241,9 @@ ENTRY(ia32_cstar_target) .quad 1b,ia32_badarg .previous GET_THREAD_INFO(%r10) - orl $TS_COMPAT,threadinfo_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + orl $TS_COMPAT,TI_status(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ + TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: @@ -241,11 +253,11 @@ cstar_do_call: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r10) jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) + andl $~TS_COMPAT,TI_status(%r10) RESTORE_ARGS 1,-ARG_SKIP,1,1,1 movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx @@ -254,8 +266,7 @@ cstar_do_call: TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp - swapgs - sysretl + USERGS_SYSRET32 cstar_tracesys: CFI_RESTORE_STATE @@ -310,12 +321,12 @@ ENTRY(ia32_syscall) /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/ CFI_REL_OFFSET rip,RIP-RIP - swapgs + SWAPGS /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%eax pushq %rax CFI_ADJUST_CFA_OFFSET 8 @@ -324,8 +335,9 @@ ENTRY(ia32_syscall) this could be a problem. */ SAVE_ARGS 0,0,1 GET_THREAD_INFO(%r10) - orl $TS_COMPAT,threadinfo_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + orl $TS_COMPAT,TI_status(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ + TI_flags(%r10) jnz ia32_tracesys ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax @@ -370,13 +382,11 @@ quiet_ni_syscall: PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx - PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx PTREGSCALL stub32_execve, sys32_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_clone, sys32_clone, %rdx PTREGSCALL stub32_vfork, sys_vfork, %rdi PTREGSCALL stub32_iopl, sys_iopl, %rsi - PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx ENTRY(ia32_ptregs_common) popq %r11 @@ -476,7 +486,7 @@ ia32_sys_call_table: .quad sys_ssetmask .quad sys_setreuid16 /* 70 */ .quad sys_setregid16 - .quad stub32_sigsuspend + .quad sys32_sigsuspend .quad compat_sys_sigpending .quad sys_sethostname .quad compat_sys_setrlimit /* 75 */ @@ -583,7 +593,7 @@ ia32_sys_call_table: .quad sys32_rt_sigpending .quad compat_sys_rt_sigtimedwait .quad sys32_rt_sigqueueinfo - .quad stub32_rt_sigsuspend + .quad sys_rt_sigsuspend .quad sys32_pread /* 180 */ .quad sys32_pwrite .quad sys_chown16 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77807d4769c9..59b14c940a28 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds +extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) @@ -18,15 +18,15 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o +obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o +obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o -obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += bootflag.o e820_$(BITS).o +obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o +obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o -obj-$(CONFIG_X86_64) += bugs_64.o -obj-y += tsc_$(BITS).o io_delay.o rtc.o +obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o @@ -53,7 +53,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o +obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o @@ -64,7 +64,6 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o -obj-$(CONFIG_ACPI_SRAT) += srat_32.o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o @@ -94,12 +93,13 @@ obj-$(CONFIG_OLPC) += olpc.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o + obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 33c5216fd3e1..a31a579a47ca 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -37,6 +37,7 @@ #include <asm/pgtable.h> #include <asm/io_apic.h> #include <asm/apic.h> +#include <asm/genapic.h> #include <asm/io.h> #include <asm/mpspec.h> #include <asm/smp.h> @@ -83,6 +84,8 @@ int acpi_lapic; int acpi_ioapic; int acpi_strict; +static int disable_irq0_through_ioapic __initdata; + u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; int acpi_skip_timer_override __initdata; @@ -106,21 +109,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; */ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; -#ifdef CONFIG_X86_64 - -/* rely on all ACPI tables being in the direct mapping */ -char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) -{ - if (!phys_addr || !size) - return NULL; - - if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE) - return __va(phys_addr); - - return NULL; -} - -#else /* * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, @@ -139,11 +127,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) unsigned long base, offset, mapped_size; int idx; - if (phys + size < 8 * 1024 * 1024) + if (!phys || !size) + return NULL; + + if (phys+size <= (max_pfn_mapped << PAGE_SHIFT)) return __va(phys); offset = phys & (PAGE_SIZE - 1); mapped_size = PAGE_SIZE - offset; + clear_fixmap(FIX_ACPI_END); set_fixmap(FIX_ACPI_END, phys); base = fix_to_virt(FIX_ACPI_END); @@ -155,13 +147,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (--idx < FIX_ACPI_BEGIN) return NULL; /* cannot handle this */ phys += PAGE_SIZE; + clear_fixmap(idx); set_fixmap(idx, phys); mapped_size += PAGE_SIZE; } return ((unsigned char *)base + offset); } -#endif #ifdef CONFIG_PCI_MMCONFIG /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ @@ -338,8 +330,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC -struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -514,8 +504,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - extern void eisa_set_level_irq(unsigned int irq); - if (triggering == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } @@ -860,6 +848,372 @@ static int __init acpi_parse_madt_lapic_entries(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +#define MP_ISA_BUS 0 + +#ifdef CONFIG_X86_ES7000 +extern int es7000_plat; +#endif + +static struct { + int apic_id; + int gsi_base; + int gsi_end; + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; + +static int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_ioapic_routing[i].gsi_base) + && (gsi <= mp_ioapic_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +static u8 __init uniq_ioapic_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mp_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mp_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} + +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].mp_type = MP_IOAPIC; + mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mp_apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 + mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); +#else + mp_ioapics[idx].mp_apicver = 0; +#endif + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].gsi_base = gsi_base; + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, + mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; +} + +static void assign_to_mp_irq(struct mp_config_intsrc *m, + struct mp_config_intsrc *mp_irq) +{ + memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); +} + +static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, + struct mp_config_intsrc *m) +{ + return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); +} + +static void save_mp_irq(struct mp_config_intsrc *m) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +{ + int ioapic; + int pin; + struct mp_config_intsrc mp_irq; + + /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ + if (bus_irq == 0 && disable_irq0_through_ioapic) + return; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_irqflag = (trigger << 2) | polarity; + mp_irq.mp_srcbus = MP_ISA_BUS; + mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ + mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ + mp_irq.mp_dstirq = pin; /* INTIN# */ + + save_mp_irq(&mp_irq); +} + +void __init mp_config_acpi_legacy_irqs(void) +{ + int i; + int ioapic; + unsigned int dstapic; + struct mp_config_intsrc mp_irq; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif + set_bit(MP_ISA_BUS, mp_bus_not_pci); + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + +#ifdef CONFIG_X86_ES7000 + /* + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) + return; +#endif + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + dstapic = mp_ioapics[ioapic].mp_apicid; + + /* + * Use the default configuration for the IRQs 0-15. Unless + * overridden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + int idx; + + /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ + if (i == 0 && disable_irq0_through_ioapic) + continue; + + for (idx = 0; idx < mp_irq_entries; idx++) { + struct mp_config_intsrc *irq = mp_irqs + idx; + + /* Do we already have a mapping for this ISA IRQ? */ + if (irq->mp_srcbus == MP_ISA_BUS + && irq->mp_srcbusirq == i) + break; + + /* Do we already have a mapping for this IOAPIC pin */ + if (irq->mp_dstapic == dstapic && + irq->mp_dstirq == i) + break; + } + + if (idx != mp_irq_entries) { + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); + continue; /* IRQ already used */ + } + + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqflag = 0; /* Conforming */ + mp_irq.mp_srcbus = MP_ISA_BUS; + mp_irq.mp_dstapic = dstapic; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_srcbusirq = i; /* Identity mapped */ + mp_irq.mp_dstirq = i; + + save_mp_irq(&mp_irq); + } +} + +int mp_register_gsi(u32 gsi, int triggering, int polarity) +{ + int ioapic; + int ioapic_pin; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + + static int pci_irq = IRQ_COMPRESSION_START; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; +#else + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +#endif + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + +#ifdef CONFIG_X86_32 + if (ioapic_renumber_irq) + gsi = ioapic_renumber_irq(ioapic, gsi); +#endif + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + return gsi; + } + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else + return gsi; +#endif + } + + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); +#ifdef CONFIG_X86_32 + /* + * For GSI >= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } +#endif + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; +} + +int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity) +{ +#ifdef CONFIG_X86_MPPARSE + struct mp_config_intsrc mp_irq; + int ioapic; + + if (!acpi_ioapic) + return 0; + + /* print the entry should happen on mptable identically */ + mp_irq.mp_type = MP_INTSRC; + mp_irq.mp_irqtype = mp_INT; + mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.mp_srcbus = number; + mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + + save_mp_irq(&mp_irq); +#endif + return 0; +} + /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error @@ -1009,8 +1363,6 @@ static void __init acpi_process_madt(void) return; } -#ifdef __i386__ - static int __init disable_acpi_irq(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1061,6 +1413,28 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) } /* + * Don't register any I/O APIC entries for the 8254 timer IRQ. + */ +static int __init +dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d) +{ + pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident); + disable_irq0_through_ioapic = 1; + return 0; +} + +/* + * Force ignoring BIOS IRQ0 pin2 override + */ +static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) +{ + pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); + acpi_skip_timer_override = 1; + force_mask_ioapic_irq_2(); + return 0; +} + +/* * If your system is blacklisted here, but you find that acpi=force * works for you, please contact [email protected] */ @@ -1227,11 +1601,61 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * HP laptops which use a DSDT reporting as HP/SB400/10000, + * which includes some code which overrides all temperature + * trip points to 16C if the INTIN2 input of the I/O APIC + * is enabled. This input is incorrectly designated the + * ISA IRQ 0 via an interrupt source override even though + * it is wired to the output of the master 8259A and INTIN0 + * is not connected at all. Abandon any attempts to route + * IRQ 0 through the I/O APIC therefore. + */ + { + .callback = dmi_disable_irq0_through_ioapic, + .ident = "HP NX6125 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"), + }, + }, + { + .callback = dmi_disable_irq0_through_ioapic, + .ident = "HP NX6325 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), + }, + }, + /* + * HP laptops which use a DSDT reporting as HP/SB400/10000, + * which includes some code which overrides all temperature + * trip points to 16C if the INTIN2 input of the I/O APIC + * is enabled. This input is incorrectly designated the + * ISA IRQ 0 via an interrupt source override even though + * it is wired to the output of the master 8259A and INTIN0 + * is not connected at all. Force ignoring BIOS IRQ0 pin2 + * override in that cases. + */ + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP NX6125 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"), + }, + }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP NX6325 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), + }, + }, {} }; -#endif /* __i386__ */ - /* * acpi_boot_table_init() and acpi_boot_init() * called from setup_arch(), always. @@ -1259,9 +1683,7 @@ int __init acpi_boot_table_init(void) { int error; -#ifdef __i386__ dmi_check_system(acpi_dmi_table); -#endif /* * If acpi_disabled, bail out @@ -1386,6 +1808,20 @@ static int __init parse_pci(char *arg) } early_param("pci", parse_pci); +int __init acpi_mps_check(void) +{ +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) +/* mptable code is not built-in*/ + if (acpi_disabled || acpi_noirq) { + printk(KERN_WARNING "MPS support code is not built-in.\n" + "Using acpi=off or acpi=noirq or pci=noacpi " + "may have problem\n"); + return 1; + } +#endif + return 0; +} + #ifdef CONFIG_X86_IO_APIC static int __init parse_acpi_skip_timer_override(char *arg) { diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index f9b77fb37e5b..3355973b12ac 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -5,6 +5,7 @@ #include <asm/msr-index.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/processor-flags.h> .code16 .section ".header", "a" @@ -24,6 +25,11 @@ pmode_gdt: .quad 0 realmode_flags: .long 0 real_magic: .long 0 trampoline_segment: .word 0 +_pad1: .byte 0 +wakeup_jmp: .byte 0xea /* ljmpw */ +wakeup_jmp_off: .word 3f +wakeup_jmp_seg: .word 0 +wakeup_gdt: .quad 0, 0, 0 signature: .long 0x51ee1111 .text @@ -34,11 +40,34 @@ _start: cli cld + /* Apparently some dimwit BIOS programmers don't know how to + program a PM to RM transition, and we might end up here with + junk in the data segment descriptor registers. The only way + to repair that is to go into PM and fix it ourselves... */ + movw $16, %cx + lgdtl %cs:wakeup_gdt + movl %cr0, %eax + orb $X86_CR0_PE, %al + movl %eax, %cr0 + jmp 1f +1: ljmpw $8, $2f +2: + movw %cx, %ds + movw %cx, %es + movw %cx, %ss + movw %cx, %fs + movw %cx, %gs + + andb $~X86_CR0_PE, %al + movl %eax, %cr0 + jmp wakeup_jmp +3: /* Set up segments */ movw %cs, %ax movw %ax, %ds movw %ax, %es movw %ax, %ss + lidtl wakeup_idt movl $wakeup_stack_end, %esp @@ -98,7 +127,14 @@ bogus_real_magic: jmp 1b .data - .balign 4 + .balign 8 + + /* This is the standard real-mode IDT */ +wakeup_idt: + .word 0xffff /* limit */ + .long 0 /* address */ + .word 0 + .globl HEAP, heap_end HEAP: .long wakeup_heap diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index ef8166fe8020..69d38d0b2b64 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -24,6 +24,11 @@ struct wakeup_header { u32 realmode_flags; u32 real_magic; u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ + u8 _pad1; + u8 wakeup_jmp; + u16 wakeup_jmp_off; + u16 wakeup_jmp_seg; + u64 wakeup_gdt[3]; u32 signature; /* To check we have correct structure */ } __attribute__((__packed__)); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee9964b..e6a4b564ccaa 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -50,6 +50,20 @@ int acpi_save_state_mem(void) header->video_mode = saved_video_mode; + header->wakeup_jmp_seg = acpi_wakeup_address >> 4; + /* GDT[0]: GDT self-pointer */ + header->wakeup_gdt[0] = + (u64)(sizeof(header->wakeup_gdt) - 1) + + ((u64)(acpi_wakeup_address + + ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) + << 16); + /* GDT[1]: real-mode-like code segment */ + header->wakeup_gdt[1] = (0x009bULL << 40) + + ((u64)acpi_wakeup_address << 16) + 0xffff; + /* GDT[2]: real-mode-like data segment */ + header->wakeup_gdt[2] = (0x0093ULL << 40) + + ((u64)acpi_wakeup_address << 16) + 0xffff; + #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); @@ -72,7 +86,9 @@ int acpi_save_state_mem(void) saved_magic = 0x12345678; #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; - init_rsp = (unsigned long)temp_stack + 4096; +#ifdef CONFIG_SMP + stack_start.sp = temp_stack + 4096; +#endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; #endif /* CONFIG_64BIT */ @@ -111,7 +127,7 @@ void __init acpi_reserve_bootmem(void) return; } - acpi_wakeup_address = acpi_realmode; + acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c new file mode 100644 index 000000000000..f2766d84c7a0 --- /dev/null +++ b/arch/x86/kernel/amd_iommu.c @@ -0,0 +1,962 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <[email protected]> + * Leo Duran <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/bitops.h> +#include <linux/scatterlist.h> +#include <linux/iommu-helper.h> +#include <asm/proto.h> +#include <asm/gart.h> +#include <asm/amd_iommu_types.h> +#include <asm/amd_iommu.h> + +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) + +#define to_pages(addr, size) \ + (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) + +static DEFINE_RWLOCK(amd_iommu_devtable_lock); + +struct command { + u32 data[4]; +}; + +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e); + +static int iommu_has_npcache(struct amd_iommu *iommu) +{ + return iommu->cap & IOMMU_CAP_NPCACHE; +} + +static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +{ + u32 tail, head; + u8 *target; + + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + target = (iommu->cmd_buf + tail); + memcpy_toio(target, cmd, sizeof(*cmd)); + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + if (tail == head) + return -ENOMEM; + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + return 0; +} + +static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&iommu->lock, flags); + ret = __iommu_queue_command(iommu, cmd); + spin_unlock_irqrestore(&iommu->lock, flags); + + return ret; +} + +static int iommu_completion_wait(struct amd_iommu *iommu) +{ + int ret; + struct command cmd; + volatile u64 ready = 0; + unsigned long ready_phys = virt_to_phys(&ready); + + memset(&cmd, 0, sizeof(cmd)); + cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; + cmd.data[1] = HIGH_U32(ready_phys); + cmd.data[2] = 1; /* value written to 'ready' */ + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + + iommu->need_sync = 0; + + ret = iommu_queue_command(iommu, &cmd); + + if (ret) + return ret; + + while (!ready) + cpu_relax(); + + return 0; +} + +static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) +{ + struct command cmd; + + BUG_ON(iommu == NULL); + + memset(&cmd, 0, sizeof(cmd)); + CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); + cmd.data[0] = devid; + + iommu->need_sync = 1; + + return iommu_queue_command(iommu, &cmd); +} + +static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, + u64 address, u16 domid, int pde, int s) +{ + struct command cmd; + + memset(&cmd, 0, sizeof(cmd)); + address &= PAGE_MASK; + CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); + cmd.data[1] |= domid; + cmd.data[2] = LOW_U32(address); + cmd.data[3] = HIGH_U32(address); + if (s) + cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) + cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + + iommu->need_sync = 1; + + return iommu_queue_command(iommu, &cmd); +} + +static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, + u64 address, size_t size) +{ + int s = 0; + unsigned pages = to_pages(address, size); + + address &= PAGE_MASK; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); + + return 0; +} + +static int iommu_map(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot) +{ + u64 __pte, *pte, *page; + + bus_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(bus_addr); + + /* only support 512GB address spaces for now */ + if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + *pte = IOMMU_L2_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + *pte = IOMMU_L1_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)]; + + if (IOMMU_PTE_PRESENT(*pte)) + return -EBUSY; + + __pte = phys_addr | IOMMU_PTE_P; + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + *pte = __pte; + + return 0; +} + +static int iommu_for_unity_map(struct amd_iommu *iommu, + struct unity_map_entry *entry) +{ + u16 bdf, i; + + for (i = entry->devid_start; i <= entry->devid_end; ++i) { + bdf = amd_iommu_alias_table[i]; + if (amd_iommu_rlookup_table[bdf] == iommu) + return 1; + } + + return 0; +} + +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e) +{ + u64 addr; + int ret; + + for (addr = e->address_start; addr < e->address_end; + addr += PAGE_SIZE) { + ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); + if (ret) + return ret; + /* + * if unity mapping is in aperture range mark the page + * as allocated in the aperture + */ + if (addr < dma_dom->aperture_size) + __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); + } + + return 0; +} + +static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, + u16 devid) +{ + struct unity_map_entry *e; + int ret; + + list_for_each_entry(e, &amd_iommu_unity_map, list) { + if (!(devid >= e->devid_start && devid <= e->devid_end)) + continue; + ret = dma_ops_unity_map(dma_dom, e); + if (ret) + return ret; + } + + return 0; +} + +static unsigned long dma_mask_to_pages(unsigned long mask) +{ + return (mask >> PAGE_SHIFT) + + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); +} + +static unsigned long dma_ops_alloc_addresses(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages) +{ + unsigned long limit = dma_mask_to_pages(*dev->dma_mask); + unsigned long address; + unsigned long size = dom->aperture_size >> PAGE_SHIFT; + unsigned long boundary_size; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + limit = limit < size ? limit : size; + + if (dom->next_bit >= limit) + dom->next_bit = 0; + + address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, + 0 , boundary_size, 0); + if (address == -1) + address = iommu_area_alloc(dom->bitmap, limit, 0, pages, + 0, boundary_size, 0); + + if (likely(address != -1)) { + dom->next_bit = address + pages; + address <<= PAGE_SHIFT; + } else + address = bad_dma_address; + + WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); + + return address; +} + +static void dma_ops_free_addresses(struct dma_ops_domain *dom, + unsigned long address, + unsigned int pages) +{ + address >>= PAGE_SHIFT; + iommu_area_free(dom->bitmap, address, pages); +} + +static u16 domain_id_alloc(void) +{ + unsigned long flags; + int id; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); + BUG_ON(id == 0); + if (id > 0 && id < MAX_DOMAIN_ID) + __set_bit(id, amd_iommu_pd_alloc_bitmap); + else + id = 0; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return id; +} + +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + set_bit_string(dom->bitmap, start_page, pages); +} + +static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) +{ + int i, j; + u64 *p1, *p2, *p3; + + p1 = dma_dom->domain.pt_root; + + if (!p1) + return; + + for (i = 0; i < 512; ++i) { + if (!IOMMU_PTE_PRESENT(p1[i])) + continue; + + p2 = IOMMU_PTE_PAGE(p1[i]); + for (j = 0; j < 512; ++i) { + if (!IOMMU_PTE_PRESENT(p2[j])) + continue; + p3 = IOMMU_PTE_PAGE(p2[j]); + free_page((unsigned long)p3); + } + + free_page((unsigned long)p2); + } + + free_page((unsigned long)p1); +} + +static void dma_ops_domain_free(struct dma_ops_domain *dom) +{ + if (!dom) + return; + + dma_ops_free_pagetable(dom); + + kfree(dom->pte_pages); + + kfree(dom->bitmap); + + kfree(dom); +} + +static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, + unsigned order) +{ + struct dma_ops_domain *dma_dom; + unsigned i, num_pte_pages; + u64 *l2_pde; + u64 address; + + /* + * Currently the DMA aperture must be between 32 MB and 1GB in size + */ + if ((order < 25) || (order > 30)) + return NULL; + + dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); + if (!dma_dom) + return NULL; + + spin_lock_init(&dma_dom->domain.lock); + + dma_dom->domain.id = domain_id_alloc(); + if (dma_dom->domain.id == 0) + goto free_dma_dom; + dma_dom->domain.mode = PAGE_MODE_3_LEVEL; + dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.priv = dma_dom; + if (!dma_dom->domain.pt_root) + goto free_dma_dom; + dma_dom->aperture_size = (1ULL << order); + dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8), + GFP_KERNEL); + if (!dma_dom->bitmap) + goto free_dma_dom; + /* + * mark the first page as allocated so we never return 0 as + * a valid dma-address. So we can use 0 as error value + */ + dma_dom->bitmap[0] = 1; + dma_dom->next_bit = 0; + + if (iommu->exclusion_start && + iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; + int pages = to_pages(iommu->exclusion_start, + iommu->exclusion_length); + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + + num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); + dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), + GFP_KERNEL); + if (!dma_dom->pte_pages) + goto free_dma_dom; + + l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); + if (l2_pde == NULL) + goto free_dma_dom; + + dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); + + for (i = 0; i < num_pte_pages; ++i) { + dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!dma_dom->pte_pages[i]) + goto free_dma_dom; + address = virt_to_phys(dma_dom->pte_pages[i]); + l2_pde[i] = IOMMU_L1_PDE(address); + } + + return dma_dom; + +free_dma_dom: + dma_ops_domain_free(dma_dom); + + return NULL; +} + +static struct protection_domain *domain_for_device(u16 devid) +{ + struct protection_domain *dom; + unsigned long flags; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = amd_iommu_pd_table[devid]; + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + +static void set_device_domain(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) +{ + unsigned long flags; + + u64 pte_root = virt_to_phys(domain->pt_root); + + pte_root |= (domain->mode & 0x07) << 9; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + amd_iommu_dev_table[devid].data[0] = pte_root; + amd_iommu_dev_table[devid].data[1] = pte_root >> 32; + amd_iommu_dev_table[devid].data[2] = domain->id; + + amd_iommu_pd_table[devid] = domain; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + iommu_queue_inv_dev_entry(iommu, devid); + + iommu->need_sync = 1; +} + +static int get_device_resources(struct device *dev, + struct amd_iommu **iommu, + struct protection_domain **domain, + u16 *bdf) +{ + struct dma_ops_domain *dma_dom; + struct pci_dev *pcidev; + u16 _bdf; + + BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); + + pcidev = to_pci_dev(dev); + _bdf = (pcidev->bus->number << 8) | pcidev->devfn; + + if (_bdf >= amd_iommu_last_bdf) { + *iommu = NULL; + *domain = NULL; + *bdf = 0xffff; + return 0; + } + + *bdf = amd_iommu_alias_table[_bdf]; + + *iommu = amd_iommu_rlookup_table[*bdf]; + if (*iommu == NULL) + return 0; + dma_dom = (*iommu)->default_dom; + *domain = domain_for_device(*bdf); + if (*domain == NULL) { + *domain = &dma_dom->domain; + set_device_domain(*iommu, *domain, *bdf); + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " + "device ", (*domain)->id); + print_devid(_bdf, 1); + } + + return 1; +} + +static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, + struct dma_ops_domain *dom, + unsigned long address, + phys_addr_t paddr, + int direction) +{ + u64 *pte, __pte; + + WARN_ON(address > dom->aperture_size); + + paddr &= PAGE_MASK; + + pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte += IOMMU_PTE_L0_INDEX(address); + + __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (direction == DMA_TO_DEVICE) + __pte |= IOMMU_PTE_IR; + else if (direction == DMA_FROM_DEVICE) + __pte |= IOMMU_PTE_IW; + else if (direction == DMA_BIDIRECTIONAL) + __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; + + WARN_ON(*pte); + + *pte = __pte; + + return (dma_addr_t)address; +} + +static void dma_ops_domain_unmap(struct amd_iommu *iommu, + struct dma_ops_domain *dom, + unsigned long address) +{ + u64 *pte; + + if (address >= dom->aperture_size) + return; + + WARN_ON(address & 0xfffULL || address > dom->aperture_size); + + pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte += IOMMU_PTE_L0_INDEX(address); + + WARN_ON(!*pte); + + *pte = 0ULL; +} + +static dma_addr_t __map_single(struct device *dev, + struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, + phys_addr_t paddr, + size_t size, + int dir) +{ + dma_addr_t offset = paddr & ~PAGE_MASK; + dma_addr_t address, start; + unsigned int pages; + int i; + + pages = to_pages(paddr, size); + paddr &= PAGE_MASK; + + address = dma_ops_alloc_addresses(dev, dma_dom, pages); + if (unlikely(address == bad_dma_address)) + goto out; + + start = address; + for (i = 0; i < pages; ++i) { + dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + paddr += PAGE_SIZE; + start += PAGE_SIZE; + } + address += offset; + +out: + return address; +} + +static void __unmap_single(struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, + dma_addr_t dma_addr, + size_t size, + int dir) +{ + dma_addr_t i, start; + unsigned int pages; + + if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + return; + + pages = to_pages(dma_addr, size); + dma_addr &= PAGE_MASK; + start = dma_addr; + + for (i = 0; i < pages; ++i) { + dma_ops_domain_unmap(iommu, dma_dom, start); + start += PAGE_SIZE; + } + + dma_ops_free_addresses(dma_dom, dma_addr, pages); +} + +static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + dma_addr_t addr; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (iommu == NULL || domain == NULL) + return (dma_addr_t)paddr; + + spin_lock_irqsave(&domain->lock, flags); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); + if (addr == bad_dma_address) + goto out; + + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return addr; +} + +static void unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(iommu, domain->priv, dma_addr, size, dir); + + iommu_flush_pages(iommu, domain->id, dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sglist, s, nelems, i) { + s->dma_address = (dma_addr_t)sg_phys(s); + s->dma_length = s->length; + } + + return nelems; +} + +static int map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + int i; + struct scatterlist *s; + phys_addr_t paddr; + int mapped_elems = 0; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) + return map_sg_no_iommu(dev, sglist, nelems, dir); + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + paddr = sg_phys(s); + + s->dma_address = __map_single(dev, iommu, domain->priv, + paddr, s->length, dir); + + if (s->dma_address) { + s->dma_length = s->length; + mapped_elems++; + } else + goto unmap; + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, s->dma_address, + s->dma_length); + } + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return mapped_elems; +unmap: + for_each_sg(sglist, s, mapped_elems, i) { + if (s->dma_address) + __unmap_single(iommu, domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + mapped_elems = 0; + + goto out; +} + +static void unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + struct scatterlist *s; + u16 devid; + int i; + + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + __unmap_single(iommu, domain->priv, s->dma_address, + s->dma_length, dir); + iommu_flush_pages(iommu, domain->id, s->dma_address, + s->dma_length); + s->dma_address = s->dma_length = 0; + } + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void *alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long flags; + void *virt_addr; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + phys_addr_t paddr; + + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + if (!virt_addr) + return 0; + + memset(virt_addr, 0, size); + paddr = virt_to_phys(virt_addr); + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) { + *dma_addr = (dma_addr_t)paddr; + return virt_addr; + } + + spin_lock_irqsave(&domain->lock, flags); + + *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + size, DMA_BIDIRECTIONAL); + + if (*dma_addr == bad_dma_address) { + free_pages((unsigned long)virt_addr, get_order(size)); + virt_addr = NULL; + goto out; + } + + if (iommu_has_npcache(iommu)) + iommu_flush_pages(iommu, domain->id, *dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return virt_addr; +} + +static void free_coherent(struct device *dev, size_t size, + void *virt_addr, dma_addr_t dma_addr) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct protection_domain *domain; + u16 devid; + + get_device_resources(dev, &iommu, &domain, &devid); + + if (!iommu || !domain) + goto free_mem; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + iommu_flush_pages(iommu, domain->id, dma_addr, size); + + if (iommu->need_sync) + iommu_completion_wait(iommu); + + spin_unlock_irqrestore(&domain->lock, flags); + +free_mem: + free_pages((unsigned long)virt_addr, get_order(size)); +} + +/* + * If the driver core informs the DMA layer if a driver grabs a device + * we don't need to preallocate the protection domains anymore. + * For now we have to. + */ +void prealloc_protection_domains(void) +{ + struct pci_dev *dev = NULL; + struct dma_ops_domain *dma_dom; + struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + u16 devid; + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + devid = (dev->bus->number << 8) | dev->devfn; + if (devid >= amd_iommu_last_bdf) + continue; + devid = amd_iommu_alias_table[devid]; + if (domain_for_device(devid)) + continue; + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + continue; + dma_dom = dma_ops_domain_alloc(iommu, order); + if (!dma_dom) + continue; + init_unity_mappings_for_device(dma_dom, devid); + set_device_domain(iommu, &dma_dom->domain, devid); + printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", + dma_dom->domain.id); + print_devid(devid, 1); + } +} + +static struct dma_mapping_ops amd_iommu_dma_ops = { + .alloc_coherent = alloc_coherent, + .free_coherent = free_coherent, + .map_single = map_single, + .unmap_single = unmap_single, + .map_sg = map_sg, + .unmap_sg = unmap_sg, +}; + +int __init amd_iommu_init_dma_ops(void) +{ + struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + int ret; + + list_for_each_entry(iommu, &amd_iommu_list, list) { + iommu->default_dom = dma_ops_domain_alloc(iommu, order); + if (iommu->default_dom == NULL) + return -ENOMEM; + ret = iommu_init_unity_mappings(iommu); + if (ret) + goto free_domains; + } + + if (amd_iommu_isolate) + prealloc_protection_domains(); + + iommu_detected = 1; + force_iommu = 1; + bad_dma_address = 0; +#ifdef CONFIG_GART_IOMMU + gart_iommu_aperture_disabled = 1; + gart_iommu_aperture = 0; +#endif + + dma_ops = &amd_iommu_dma_ops; + + return 0; + +free_domains: + + list_for_each_entry(iommu, &amd_iommu_list, list) { + if (iommu->default_dom) + dma_ops_domain_free(iommu->default_dom); + } + + return ret; +} diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c new file mode 100644 index 000000000000..2a13e430437d --- /dev/null +++ b/arch/x86/kernel/amd_iommu_init.c @@ -0,0 +1,875 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <[email protected]> + * Leo Duran <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/pci.h> +#include <linux/acpi.h> +#include <linux/gfp.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <asm/pci-direct.h> +#include <asm/amd_iommu_types.h> +#include <asm/amd_iommu.h> +#include <asm/gart.h> + +/* + * definitions for the ACPI scanning code + */ +#define UPDATE_LAST_BDF(x) do {\ + if ((x) > amd_iommu_last_bdf) \ + amd_iommu_last_bdf = (x); \ + } while (0); + +#define DEVID(bus, devfn) (((bus) << 8) | (devfn)) +#define PCI_BUS(x) (((x) >> 8) & 0xff) +#define IVRS_HEADER_LENGTH 48 +#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) + +#define ACPI_IVHD_TYPE 0x10 +#define ACPI_IVMD_TYPE_ALL 0x20 +#define ACPI_IVMD_TYPE 0x21 +#define ACPI_IVMD_TYPE_RANGE 0x22 + +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_ALIAS_RANGE 0x43 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_EXT_SELECT_RANGE 0x47 + +#define IVHD_FLAG_HT_TUN_EN 0x00 +#define IVHD_FLAG_PASSPW_EN 0x01 +#define IVHD_FLAG_RESPASSPW_EN 0x02 +#define IVHD_FLAG_ISOC_EN 0x03 + +#define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_UNITY_MAP 0x01 + +#define ACPI_DEVFLAG_INITPASS 0x01 +#define ACPI_DEVFLAG_EXTINT 0x02 +#define ACPI_DEVFLAG_NMI 0x04 +#define ACPI_DEVFLAG_SYSMGT1 0x10 +#define ACPI_DEVFLAG_SYSMGT2 0x20 +#define ACPI_DEVFLAG_LINT0 0x40 +#define ACPI_DEVFLAG_LINT1 0x80 +#define ACPI_DEVFLAG_ATSDIS 0x10000000 + +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 reserved; +} __attribute__((packed)); + +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; +} __attribute__((packed)); + +struct ivmd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 aux; + u64 resv; + u64 range_start; + u64 range_length; +} __attribute__((packed)); + +static int __initdata amd_iommu_detected; + +u16 amd_iommu_last_bdf; +struct list_head amd_iommu_unity_map; +unsigned amd_iommu_aperture_order = 26; +int amd_iommu_isolate; + +struct list_head amd_iommu_list; +struct dev_table_entry *amd_iommu_dev_table; +u16 *amd_iommu_alias_table; +struct amd_iommu **amd_iommu_rlookup_table; +struct protection_domain **amd_iommu_pd_table; +unsigned long *amd_iommu_pd_alloc_bitmap; + +static u32 dev_table_size; +static u32 alias_table_size; +static u32 rlookup_table_size; + +static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) +{ + u64 start = iommu->exclusion_start & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 entry; + + if (!iommu->exclusion_start) + return; + + entry = start | MMIO_EXCL_ENABLE_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + entry = limit; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_set_device_table(struct amd_iommu *iommu) +{ + u32 entry; + + BUG_ON(iommu->mmio_base == NULL); + + entry = virt_to_phys(amd_iommu_dev_table); + entry |= (dev_table_size >> 12) - 1; + memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +void __init iommu_enable(struct amd_iommu *iommu) +{ + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); + print_devid(iommu->devid, 0); + printk(" cap 0x%hx\n", iommu->cap_ptr); + + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); +} + +static u8 * __init iommu_map_mmio_space(u64 address) +{ + u8 *ret; + + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) + return NULL; + + ret = ioremap_nocache(address, MMIO_REGION_LENGTH); + if (ret != NULL) + return ret; + + release_mem_region(address, MMIO_REGION_LENGTH); + + return NULL; +} + +static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) +{ + if (iommu->mmio_base) + iounmap(iommu->mmio_base); + release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); +} + +static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) +{ + u32 cap; + + cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + + return 0; +} + +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + p += sizeof(*h); + end += h->length; + + find_last_devid_on_pci(PCI_BUS(h->devid), + PCI_SLOT(h->devid), + PCI_FUNC(h->devid), + h->cap_ptr); + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + UPDATE_LAST_BDF(dev->devid); + break; + default: + break; + } + p += 0x04 << (*p >> 6); + } + + WARN_ON(p != end); + + return 0; +} + +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) + /* ACPI table corrupt */ + return -ENODEV; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + switch (h->type) { + case ACPI_IVHD_TYPE: + find_last_devid_from_ivhd(h); + break; + default: + break; + } + p += h->length; + } + WARN_ON(p != end); + + return 0; +} + +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +{ + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, + get_order(CMD_BUFFER_SIZE)); + u64 entry = 0; + + if (cmd_buf == NULL) + return NULL; + + iommu->cmd_buf_size = CMD_BUFFER_SIZE; + + memset(cmd_buf, 0, CMD_BUFFER_SIZE); + + entry = (u64)virt_to_phys(cmd_buf); + entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); + + return cmd_buf; +} + +static void __init free_command_buffer(struct amd_iommu *iommu) +{ + if (iommu->cmd_buf) + free_pages((unsigned long)iommu->cmd_buf, + get_order(CMD_BUFFER_SIZE)); +} + +static void set_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + amd_iommu_dev_table[devid].data[i] |= (1 << _bit); +} + +static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) +{ + if (flags & ACPI_DEVFLAG_INITPASS) + set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); +} + +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) + return; + + if (iommu) { + set_dev_entry_bit(m->devid, DEV_ENTRY_EX); + iommu->exclusion_start = m->range_start; + iommu->exclusion_length = m->range_length; + } +} + +static void __init init_iommu_from_pci(struct amd_iommu *iommu) +{ + int bus = PCI_BUS(iommu->devid); + int dev = PCI_SLOT(iommu->devid); + int fn = PCI_FUNC(iommu->devid); + int cap_ptr = iommu->cap_ptr; + u32 range; + + iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + + range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); + iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); +} + +static void __init init_iommu_from_acpi(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p, flags = 0; + u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; + u32 ext_flags = 0; + bool alias = 0; + struct ivhd_entry *e; + + /* + * First set the recommended feature enable bits from ACPI + * into the IOMMU control registers + */ + h->flags & IVHD_FLAG_HT_TUN_EN ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + h->flags & IVHD_FLAG_PASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + h->flags & IVHD_FLAG_RESPASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + h->flags & IVHD_FLAG_ISOC_EN ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + + /* + * Done. Now parse the device entries + */ + p += sizeof(struct ivhd_header); + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = iommu->first_device; + dev_i <= iommu->last_device; ++dev_i) + set_dev_entry_from_acpi(dev_i, e->flags, 0); + break; + case IVHD_DEV_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, 0); + break; + case IVHD_DEV_SELECT_RANGE_START: + devid_start = e->devid; + flags = e->flags; + ext_flags = 0; + alias = 0; + break; + case IVHD_DEV_ALIAS: + devid = e->devid; + devid_to = e->ext >> 8; + set_dev_entry_from_acpi(devid, e->flags, 0); + amd_iommu_alias_table[devid] = devid_to; + break; + case IVHD_DEV_ALIAS_RANGE: + devid_start = e->devid; + flags = e->flags; + devid_to = e->ext >> 8; + ext_flags = 0; + alias = 1; + break; + case IVHD_DEV_EXT_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, e->ext); + break; + case IVHD_DEV_EXT_SELECT_RANGE: + devid_start = e->devid; + flags = e->flags; + ext_flags = e->ext; + alias = 0; + break; + case IVHD_DEV_RANGE_END: + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) { + if (alias) + amd_iommu_alias_table[dev_i] = devid_to; + set_dev_entry_from_acpi( + amd_iommu_alias_table[dev_i], + flags, ext_flags); + } + break; + default: + break; + } + + p += 0x04 << (e->type >> 6); + } +} + +static int __init init_iommu_devices(struct amd_iommu *iommu) +{ + u16 i; + + for (i = iommu->first_device; i <= iommu->last_device; ++i) + set_iommu_for_device(iommu, i); + + return 0; +} + +static void __init free_iommu_one(struct amd_iommu *iommu) +{ + free_command_buffer(iommu); + iommu_unmap_mmio_space(iommu); +} + +static void __init free_iommu_all(void) +{ + struct amd_iommu *iommu, *next; + + list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { + list_del(&iommu->list); + free_iommu_one(iommu); + kfree(iommu); + } +} + +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +{ + spin_lock_init(&iommu->lock); + list_add_tail(&iommu->list, &amd_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->cap_ptr = h->cap_ptr; + iommu->mmio_phys = h->mmio_phys; + iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); + if (!iommu->mmio_base) + return -ENOMEM; + + iommu_set_device_table(iommu); + iommu->cmd_buf = alloc_command_buffer(iommu); + if (!iommu->cmd_buf) + return -ENOMEM; + + init_iommu_from_pci(iommu); + init_iommu_from_acpi(iommu, h); + init_iommu_devices(iommu); + + return 0; +} + +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct amd_iommu *iommu; + int ret; + + INIT_LIST_HEAD(&amd_iommu_list); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + switch (*p) { + case ACPI_IVHD_TYPE: + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + ret = init_iommu_one(iommu, h); + if (ret) + return ret; + break; + default: + break; + } + p += h->length; + + } + WARN_ON(p != end); + + return 0; +} + +static void __init free_unity_maps(void) +{ + struct unity_map_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { + list_del(&entry->list); + kfree(entry); + } +} + +static int __init init_exclusion_range(struct ivmd_header *m) +{ + int i; + + switch (m->type) { + case ACPI_IVMD_TYPE: + set_device_exclusion_range(m->devid, m); + break; + case ACPI_IVMD_TYPE_ALL: + for (i = 0; i < amd_iommu_last_bdf; ++i) + set_device_exclusion_range(i, m); + break; + case ACPI_IVMD_TYPE_RANGE: + for (i = m->devid; i <= m->aux; ++i) + set_device_exclusion_range(i, m); + break; + default: + break; + } + + return 0; +} + +static int __init init_unity_map_range(struct ivmd_header *m) +{ + struct unity_map_entry *e = 0; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + switch (m->type) { + default: + case ACPI_IVMD_TYPE: + e->devid_start = e->devid_end = m->devid; + break; + case ACPI_IVMD_TYPE_ALL: + e->devid_start = 0; + e->devid_end = amd_iommu_last_bdf; + break; + case ACPI_IVMD_TYPE_RANGE: + e->devid_start = m->devid; + e->devid_end = m->aux; + break; + } + e->address_start = PAGE_ALIGN(m->range_start); + e->address_end = e->address_start + PAGE_ALIGN(m->range_length); + e->prot = m->flags >> 1; + + list_add_tail(&e->list, &amd_iommu_unity_map); + + return 0; +} + +static int __init init_memory_definitions(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivmd_header *m; + + INIT_LIST_HEAD(&amd_iommu_unity_map); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + m = (struct ivmd_header *)p; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + else if (m->flags & IVMD_FLAG_UNITY_MAP) + init_unity_map_range(m); + + p += m->length; + } + + return 0; +} + +static void __init enable_iommus(void) +{ + struct amd_iommu *iommu; + + list_for_each_entry(iommu, &amd_iommu_list, list) { + iommu_set_exclusion_range(iommu); + iommu_enable(iommu); + } +} + +/* + * Suspend/Resume support + * disable suspend until real resume implemented + */ + +static int amd_iommu_resume(struct sys_device *dev) +{ + return 0; +} + +static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class amd_iommu_sysdev_class = { + .name = "amd_iommu", + .suspend = amd_iommu_suspend, + .resume = amd_iommu_resume, +}; + +static struct sys_device device_amd_iommu = { + .id = 0, + .cls = &amd_iommu_sysdev_class, +}; + +int __init amd_iommu_init(void) +{ + int i, ret = 0; + + + if (no_iommu) { + printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); + return 0; + } + + if (!amd_iommu_detected) + return -ENODEV; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) + return -ENODEV; + + dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); + alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); + + ret = -ENOMEM; + + /* Device table - directly used by all IOMMUs */ + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(dev_table_size)); + if (amd_iommu_dev_table == NULL) + goto out; + + /* + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the + * IOMMU see for that device + */ + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (amd_iommu_alias_table == NULL) + goto free; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_rlookup_table == NULL) + goto free; + + /* + * Protection Domain table - maps devices to protection domains + * This table has the same size as the rlookup_table + */ + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_pd_table == NULL) + goto free; + + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, + get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_pd_alloc_bitmap == NULL) + goto free; + + /* + * memory is allocated now; initialize the device table with all zeroes + * and let all alias entries point to itself + */ + memset(amd_iommu_dev_table, 0, dev_table_size); + for (i = 0; i < amd_iommu_last_bdf; ++i) + amd_iommu_alias_table[i] = i; + + memset(amd_iommu_pd_table, 0, rlookup_table_size); + memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); + + /* + * never allocate domain 0 because its used as the non-allocated and + * error value placeholder + */ + amd_iommu_pd_alloc_bitmap[0] = 1; + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = -ENODEV; + if (acpi_table_parse("IVRS", init_iommu_all) != 0) + goto free; + + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) + goto free; + + ret = amd_iommu_init_dma_ops(); + if (ret) + goto free; + + ret = sysdev_class_register(&amd_iommu_sysdev_class); + if (ret) + goto free; + + ret = sysdev_register(&device_amd_iommu); + if (ret) + goto free; + + enable_iommus(); + + printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", + (1 << (amd_iommu_aperture_order-20))); + + printk(KERN_INFO "AMD IOMMU: device isolation "); + if (amd_iommu_isolate) + printk("enabled\n"); + else + printk("disabled\n"); + +out: + return ret; + +free: + if (amd_iommu_pd_alloc_bitmap) + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + + if (amd_iommu_pd_table) + free_pages((unsigned long)amd_iommu_pd_table, + get_order(rlookup_table_size)); + + if (amd_iommu_rlookup_table) + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); + + if (amd_iommu_alias_table) + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); + + if (amd_iommu_dev_table) + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + + free_iommu_all(); + + free_unity_maps(); + + goto out; +} + +static int __init early_amd_iommu_detect(struct acpi_table_header *table) +{ + return 0; +} + +void __init amd_iommu_detect(void) +{ + if (swiotlb || no_iommu || iommu_detected) + return; + + if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { + iommu_detected = 1; + amd_iommu_detected = 1; +#ifdef CONFIG_GART_IOMMU + gart_iommu_aperture_disabled = 1; + gart_iommu_aperture = 0; +#endif + } +} + +static int __init parse_amd_iommu_options(char *str) +{ + for (; *str; ++str) { + if (strcmp(str, "isolate") == 0) + amd_iommu_isolate = 1; + } + + return 1; +} + +static int __init parse_amd_iommu_size_options(char *str) +{ + for (; *str; ++str) { + if (strcmp(str, "32M") == 0) + amd_iommu_aperture_order = 25; + if (strcmp(str, "64M") == 0) + amd_iommu_aperture_order = 26; + if (strcmp(str, "128M") == 0) + amd_iommu_aperture_order = 27; + if (strcmp(str, "256M") == 0) + amd_iommu_aperture_order = 28; + if (strcmp(str, "512M") == 0) + amd_iommu_aperture_order = 29; + if (strcmp(str, "1G") == 0) + amd_iommu_aperture_order = 30; + } + + return 1; +} + +__setup("amd_iommu=", parse_amd_iommu_options); +__setup("amd_iommu_size=", parse_amd_iommu_size_options); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index e819362c7068..9f907806c1a5 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -328,7 +328,7 @@ void __init early_gart_iommu_check(void) E820_RAM)) { /* reserve it, so we can reuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); - add_memory_region(aper_base, aper_size, E820_RESERVED); + e820_add_region(aper_base, aper_size, E820_RESERVED); update_e820(); } } @@ -407,7 +407,9 @@ void __init gart_iommu_hole_init(void) agp_aper_base == aper_base && agp_aper_order == aper_order) { /* the same between two setting from NB and agp */ - if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { + if (!no_iommu && + max_pfn > MAX_DMA32_PFN && + !printed_gart_size_msg) { printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); printk(KERN_ERR "please increase GART size in your BIOS setup\n"); printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); @@ -448,7 +450,7 @@ out: /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ - } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || + } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || fallback_aper_force) { diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..3e947208b9d9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -52,30 +52,41 @@ unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - /* * Knob to control our willingness to enable the local APIC. * - * -1=force-disable, +1=force-enable + * +1=force-enable */ -static int enable_local_apic __initdata; +static int force_enable_local_apic; +int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk - or using CPU MSR check */ -int local_apic_timer_disabled; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int local_apic_timer_disabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ int apic_verbosity; +int pic_mode; + +/* Have we found an MP table */ +int smp_found_config; + +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + static unsigned int calibration_result; static int lapic_next_event(unsigned long delta, @@ -545,7 +556,7 @@ void __init setup_boot_APIC_clock(void) lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; else printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); + " due to nmi_watchdog=%d!\n", nmi_watchdog); } /* Setup the lapic or request the broadcast */ @@ -1094,7 +1105,7 @@ static int __init detect_init_APIC(void) u32 h, l, features; /* Disabled by kernel option? */ - if (enable_local_apic < 0) + if (disable_apic) return -1; switch (boot_cpu_data.x86_vendor) { @@ -1117,7 +1128,7 @@ static int __init detect_init_APIC(void) * Over-ride BIOS and try to enable the local APIC only if * "lapic" specified. */ - if (enable_local_apic <= 0) { + if (!force_enable_local_apic) { printk(KERN_INFO "Local APIC disabled by BIOS -- " "you can enable it with \"lapic\"\n"); return -1; @@ -1154,9 +1165,6 @@ static int __init detect_init_APIC(void) if (l & MSR_IA32_APICBASE_ENABLE) mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) - nmi_watchdog = NMI_LOCAL_APIC; - printk(KERN_INFO "Found and enabled local APIC!\n"); apic_pm_activate(); @@ -1195,36 +1203,6 @@ void __init init_apic_mappings(void) if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } - } else { -fake_ioapic_page: - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif } /* @@ -1236,7 +1214,7 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { - if (enable_local_apic < 0) + if (disable_apic) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); if (!smp_found_config && !cpu_has_apic) @@ -1265,10 +1243,14 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_CRASH_DUMP boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); #endif - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); +#ifdef CONFIG_X86_IO_APIC + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif + localise_nmi_watchdog(); end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) @@ -1351,13 +1333,13 @@ void __init smp_intr_init(void) * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } #endif @@ -1370,15 +1352,15 @@ void __init apic_intr_init(void) smp_intr_init(); #endif /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* thermal monitor LVT interrupt */ #ifdef CONFIG_X86_MCE_P4THERMAL - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif } @@ -1513,6 +1495,9 @@ void __cpuinit generic_processor_info(int apicid, int version) */ cpu = 0; + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1520,7 +1505,7 @@ void __cpuinit generic_processor_info(int apicid, int version) * if (CPU_HOTPLUG_ENABLED || num_processors > 8) * - Ashok Raj <[email protected]> */ - if (num_processors > 8) { + if (max_physical_apicid >= 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { @@ -1534,9 +1519,9 @@ void __cpuinit generic_processor_info(int apicid, int version) } #ifdef CONFIG_SMP /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; @@ -1703,14 +1688,14 @@ static void apic_pm_activate(void) { } */ static int __init parse_lapic(char *arg) { - enable_local_apic = 1; + force_enable_local_apic = 1; return 0; } early_param("lapic", parse_lapic); static int __init parse_nolapic(char *arg) { - enable_local_apic = -1; + disable_apic = 1; clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return 0; } @@ -1740,3 +1725,21 @@ static int __init apic_set_verbosity(char *str) } __setup("apic=", apic_set_verbosity); +static int __init lapic_insert_resource(void) +{ + if (!apic_phys) + return -1; + + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + + return 0; +} + +/* + * need call insert after e820_reserve_resources() + * that is using request_resource + */ +late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0633cfd0dc29..1e3d32e27c14 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -43,7 +43,7 @@ #include <mach_ipi.h> #include <mach_apic.h> -int disable_apic_timer __cpuinitdata; +static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); */ int apic_verbosity; +/* Have we found an MP table */ +int smp_found_config; + static struct resource lapic_resource = { .name = "Local APIC", .flags = IORESOURCE_MEM | IORESOURCE_BUSY, @@ -87,9 +90,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version @@ -417,37 +417,13 @@ void __init setup_boot_APIC_clock(void) lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; else printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); + " due to nmi_watchdog=%d!\n", nmi_watchdog); setup_APIC_timer(); } -/* - * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the - * C1E flag only in the secondary CPU, so when we detect the wreckage - * we already have enabled the boot CPU local apic timer. Check, if - * disable_apic_timer is set and the DUMMY flag is cleared. If yes, - * set the DUMMY flag again and force the broadcast mode in the - * clockevents layer. - */ -static void __cpuinit check_boot_apic_timer_broadcast(void) -{ - if (!disable_apic_timer || - (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) - return; - - printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); - lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; - - local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &boot_cpu_physical_apicid); - local_irq_disable(); -} - void __cpuinit setup_secondary_APIC_clock(void) { - check_boot_apic_timer_broadcast(); setup_APIC_timer(); } @@ -850,7 +826,6 @@ static void __cpuinit lapic_setup_esr(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); - nmi_watchdog_default(); setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -875,7 +850,7 @@ static int __init detect_init_APIC(void) void __init early_init_lapic_mapping(void) { - unsigned long apic_phys; + unsigned long phys_addr; /* * If no local APIC can be found then go out @@ -884,11 +859,11 @@ void __init early_init_lapic_mapping(void) if (!smp_found_config) return; - apic_phys = mp_lapic_addr; + phys_addr = mp_lapic_addr; - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); + APIC_BASE, phys_addr); /* * Fetch the APIC ID of the BSP in case we have a @@ -942,7 +917,9 @@ int __init APIC_init_uniprocessor(void) verify_local_APIC(); - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + connect_bsp_APIC(); + + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); setup_local_APIC(); @@ -954,6 +931,8 @@ int __init APIC_init_uniprocessor(void) if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) + localise_nmi_watchdog(); end_local_APIC_setup(); if (smp_found_config && !skip_ioapic_setup && nr_ioapics) @@ -1021,6 +1000,14 @@ asmlinkage void smp_error_interrupt(void) irq_exit(); } +/** + * * connect_bsp_APIC - attach the APIC to the interrupt system + * */ +void __init connect_bsp_APIC(void) +{ + enable_apic_mode(); +} + void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1090,10 +1077,13 @@ void __cpuinit generic_processor_info(int apicid, int version) */ cpu = 0; } + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; @@ -1269,7 +1259,7 @@ __cpuinit int apic_is_clustered_box(void) if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) return 0; - bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e29013..00e6d1370954 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,6 +228,7 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/jiffies.h> +#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) as->event_tail = 0; } as->events[as->event_head] = event; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) continue; switch (event) { case APM_SYS_SUSPEND: @@ -1396,7 +1397,7 @@ static void apm_mainloop(void) static int check_apm_user(struct apm_user *as, const char *func) { - if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { + if (as == NULL || as->magic != APM_BIOS_MAGIC) { printk(KERN_ERR "apm: %s passed bad filp\n", func); return 1; } @@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait) return 0; } -static int do_ioctl(struct inode *inode, struct file *filp, - u_int cmd, u_long arg) +static long do_ioctl(struct file *filp, u_int cmd, u_long arg) { struct apm_user *as; + int ret; as = filp->private_data; if (check_apm_user(as, "ioctl")) return -EIO; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: + lock_kernel(); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp, queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); + unlock_kernel(); break; case APM_IOC_SUSPEND: + lock_kernel(); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp, } else queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { - return suspend(1); + ret = suspend(1); } else { as->suspend_wait = 1; wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); - return as->suspend_result; + ret = as->suspend_result; } - break; + unlock_kernel(); + return ret; default: - return -EINVAL; + return -ENOTTY; } return 0; } @@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = { .owner = THIS_MODULE, .read = do_read, .poll = do_poll, - .ioctl = do_ioctl, + .unlocked_ioctl = do_ioctl, .open = do_open, .release = do_release, }; diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 92588083950f..6649d09ad88f 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -111,7 +111,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f126c05d6170..bacf5deeec2d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -34,7 +34,7 @@ int main(void) ENTRY(pid); BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) +#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry)) ENTRY(flags); ENTRY(addr_limit); ENTRY(preempt_count); @@ -61,8 +61,11 @@ int main(void) OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); + OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); + OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); + OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0c6f8190887..ee76eaad3001 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_64) += common_64.o bugs_64.o obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_64) += amd_64.o obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_64) += centaur_64.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_64) += intel_64.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c2e1ce33c7cb..84a8220a6072 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -1,9 +1,7 @@ - /* * Routines to indentify additional cpu features that are scattered in * cpuid space. */ - #include <linux/cpu.h> #include <asm/pat.h> @@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { + if (!cpu_has_pat) + pat_disable("PAT not supported by CPU."); + switch (c->x86_vendor) { - case X86_VENDOR_AMD: - if (c->x86 >= 0xf && c->x86 <= 0x11) - return; - break; case X86_VENDOR_INTEL: if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) return; break; + case X86_VENDOR_AMD: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_TRANSMETA: + return; } - pat_disable(cpu_has_pat ? - "PAT disabled. Not yet verified on this CPU type." : - "PAT not supported by CPU."); + pat_disable("PAT disabled. Not yet verified on this CPU type."); } #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 245866828294..81a07ca65d44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,43 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -#ifdef CONFIG_X86_LOCAL_APIC -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi; - u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) { - if (smp_processor_id() != boot_cpu_physical_apicid) - printk(KERN_INFO "AMD C1E detected late. " - " Force timer broadcast.\n"); - return 1; - } - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} -#endif - int force_mwait __cpuinitdata; static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) @@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } -#ifdef CONFIG_X86_LOCAL_APIC - if (amd_apic_timer_broken()) - local_apic_timer_disabled = 1; -#endif - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c new file mode 100644 index 000000000000..958526d6a74a --- /dev/null +++ b/arch/x86/kernel/cpu/amd_64.c @@ -0,0 +1,217 @@ +#include <linux/init.h> +#include <linux/mm.h> + +#include <asm/numa_64.h> +#include <asm/mmconfig.h> +#include <asm/cacheflush.h> + +#include <mach_apic.h> + +#include "cpu.h" + +int force_mwait __cpuinitdata; + +#ifdef CONFIG_NUMA +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits; +#ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node = 0; + unsigned apicid = hard_smp_processor_id(); +#endif + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; + +#ifdef CONFIG_NUMA + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; + +#endif +} + +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + unsigned level; + +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 0xf) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } +#endif + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_cpu_cap(c, 0*32+31); + + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + + level = get_model_name(c); + if (!level) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } + display_cacheinfo(c); + + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + if (c->extended_cpuid_level >= 0x80000006 && + (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); + + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && + (tseg >> PMD_SHIFT) < + (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) + set_memory_4k((unsigned long)__va(tseg), 1); + } +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_early_init = early_init_amd, + .c_init = init_amd, +}; + +cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); + diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 170d2f5523b2..1b1c56bb338f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,8 +59,12 @@ static void __init check_fpu(void) return; } -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug.. + */ __asm__("fninit\n\t" "fldl %1\n\t" "fdivl %2\n\t" @@ -108,10 +112,15 @@ static void __init check_popad(void) "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); + : "ecx", "edi"); + /* + * If this fails, it means that any user program may lock the + * CPU hard. Too bad. + */ + if (res != 12345678) + printk("Buggy.\n"); + else + printk("OK.\n"); #endif } @@ -137,7 +146,8 @@ static void __init check_config(void) * i486+ only features! (WP works in supervisor mode and the * new "invlpg" and "bswap" instructions) */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ + defined(CONFIG_X86_BSWAP) if (boot_cpu_data.x86 == 3) panic("Kernel requires i486+ for 'invlpg' and other features"); #endif @@ -170,6 +180,7 @@ void __init check_bugs(void) check_fpu(); check_hlt(); check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); } diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed0649d4e..9a3ed0649d4e 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c new file mode 100644 index 000000000000..13526fd5cce1 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -0,0 +1,43 @@ +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/cpufeature.h> +#include <asm/processor.h> + +#include "cpu.h" + +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x6 && c->x86_model >= 0xf) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 0x6 && c->x86_model >= 0xf) { + c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +} + +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { + .c_vendor = "Centaur", + .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, + .c_init = init_centaur, +}; + +cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); + diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a946247..80ab20d4fa39 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c new file mode 100644 index 000000000000..751850235291 --- /dev/null +++ b/arch/x86/kernel/cpu/common_64.c @@ -0,0 +1,676 @@ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/bootmem.h> +#include <linux/bitops.h> +#include <linux/module.h> +#include <linux/kgdb.h> +#include <linux/topology.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/mtrr.h> +#include <asm/mce.h> +#include <asm/pat.h> +#include <asm/numa.h> +#ifdef CONFIG_X86_LOCAL_APIC +#include <asm/mpspec.h> +#include <asm/apic.h> +#include <mach_apic.h> +#endif +#include <asm/pda.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/desc.h> +#include <asm/atomic.h> +#include <asm/proto.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/genapic.h> + +#include "cpu.h" + +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +} + +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; + +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ + display_cacheinfo(c); +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + + if (c->extended_cpuid_level < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + return 1; +} + + +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, eax, ebx, ecx, edx; + + n = c->extended_cpuid_level; + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " + "D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; + } + + if (n >= 0x80000006) { + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + ecx = cpuid_ecx(0x80000006); + c->x86_cache_size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + c->x86_cache_size, ecx & 0xFF); + } + if (n >= 0x80000008) { + cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +} + +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + + if (!cpu_has(c, X86_FEATURE_HT)) + return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(index_msb); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); + } +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } + +#endif +} + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + this_cpu = cpu_devs[i]; + return; + } + } + } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); + +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start ; + cvdev < __x86cpuvendor_end ; + cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + c->extended_cpuid_level = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + /* Get vendor name */ + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, + (unsigned int *)&c->x86_vendor_id[0], + (unsigned int *)&c->x86_vendor_id[8], + (unsigned int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + __u32 misc; + cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if (xlvl >= 0x80000004) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + c->extended_cpuid_level = cpuid_eax(0x80000000); + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->x86_vendor != X86_VENDOR_UNKNOWN && + cpu_devs[c->x86_vendor]->c_early_init) + cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); + + /* early_param could clear that, but recall get it set again */ + if (disable_apic) + clear_cpu_cap(c, X86_FEATURE_APIC); +} + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + early_identify_cpu(c); + + init_scattered_cpuid_features(c); + + c->apicid = phys_pkg_id(0); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + detect_ht(c); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if (c != &boot_cpu_data) { + /* AND the already accumulated flags with these */ + for (i = 0; i < NCAPINTS; i++) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + /* Clear all flags overriden by options */ + for (i = 0; i < NCAPINTS; i++) + c->x86_capability[i] &= ~cleared_cpu_caps[i]; + +#ifdef CONFIG_X86_MCE + mcheck_init(c); +#endif + select_idle_routine(c); + +#ifdef CONFIG_NUMA + numa_add_cpu(smp_processor_id()); +#endif + +} + +void __cpuinit identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + mtrr_ap_init(); +} + +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +{ + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(KERN_CONT " stepping %02x\n", c->x86_mask); + else + printk(KERN_CONT "\n"); +} + +static __init int setup_disablecpuid(char *arg) +{ + int bit; + if (get_option(&arg, &bit) && bit < NCAPINTS*32) + setup_clear_cpu_cap(bit); + else + return 0; + return 1; +} +__setup("clearcpuid=", setup_disablecpuid); + +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + } else { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", cpu); + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } + + pda->irqstackptr += IRQSTACKSIZE-64; +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] +__attribute__((section(".bss.page_aligned"))); + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init. + */ +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + + /* + * set up and load the per-CPU TSS + */ + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception stack %ld %d\n", + v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 783691b2a738..4d894e8565fe 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -1,3 +1,6 @@ +#ifndef ARCH_X86_CPU_H + +#define ARCH_X86_CPU_H struct cpu_model_info { int vendor; @@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); + +#endif diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index f03e9153618e..965ea52767ac 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -26,9 +26,10 @@ #define NFORCE2_SAFE_DISTANCE 50 /* Delay in ms between FSB changes */ -//#define NFORCE2_DELAY 10 +/* #define NFORCE2_DELAY 10 */ -/* nforce2_chipset: +/* + * nforce2_chipset: * FSB is changed using the chipset */ static struct pci_dev *nforce2_chipset_dev; @@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev; /* fid: * multiplier * 10 */ -static int fid = 0; +static int fid; /* min_fsb, max_fsb: * minimum and maximum FSB (= FSB at boot time) */ -static int min_fsb = 0; -static int max_fsb = 0; +static int min_fsb; +static int max_fsb; MODULE_AUTHOR("Sebastian Witt <[email protected]>"); MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); @@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444); MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, - "Minimum FSB to use, if not defined: current FSB - 50"); + "Minimum FSB to use, if not defined: current FSB - 50"); #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) @@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb) /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); + 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); if (!nforce2_sub5) return 0; @@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb) fsb /= 1000000; /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); - if(bootfsb || !temp) + if (bootfsb || !temp) return fsb; - + /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; @@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb) tfsb--; /* Calculate the PLL reg. value */ - if ((pll = nforce2_calc_pll(tfsb)) == -1) + pll = nforce2_calc_pll(tfsb); + if (pll == -1) return -EINVAL; nforce2_write_pll(pll); @@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu) static int nforce2_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { -// unsigned long flags; +/* unsigned long flags; */ struct cpufreq_freqs freqs; unsigned int target_fsb; @@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); /* Disable IRQs */ - //local_irq_save(flags); + /* local_irq_save(flags); */ if (nforce2_set_fsb(target_fsb) < 0) printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", - target_fsb); + target_fsb); else dprintk("Changed FSB successfully to %d\n", - target_fsb); + target_fsb); /* Enable IRQs */ - //local_irq_restore(flags); + /* local_irq_restore(flags); */ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); @@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy) policy->max = (fsb_pol_max + 1) * fid * 100; cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); return 0; } @@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* Set maximum FSB to FSB at boot time */ max_fsb = nforce2_fsb_read(1); - if(!max_fsb) + if (!max_fsb) return -EIO; if (!min_fsb) diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c new file mode 100644 index 000000000000..fcb1cc9d75ca --- /dev/null +++ b/arch/x86/kernel/cpu/intel_64.c @@ -0,0 +1,103 @@ +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/topology.h> +#include <asm/numa_64.h> + +#include "cpu.h" + +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +{ + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, t; + + if (c->cpuid_level < 4) + return 1; + + cpuid_count(4, 0, &eax, &t, &t, &t); + + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + } + + + if (cpu_has_bts) + ds_init_intel(c); + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_early_init = early_init_intel, + .c_init = init_intel, +}; +cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); + diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 26d615dcb149..2c8afafa18e8 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index e633c9c2b764..f390c9f66351 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -9,23 +9,23 @@ #include <linux/interrupt.h> #include <linux/smp.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/system.h> #include <asm/msr.h> #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron */ -static void k7_machine_check(struct pt_regs * regs, long error_code) +static void k7_machine_check(struct pt_regs *regs, long error_code) { - int recover=1; + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + recover = 0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); @@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code) } if (recover&2) - panic ("CPU context corrupt"); + panic("CPU context corrupt"); if (recover&1) - panic ("Unable to continue"); - printk (KERN_EMERG "Attempting to continue.\n"); + panic("Unable to continue"); + printk(KERN_EMERG "Attempting to continue.\n"); mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); + wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } @@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) machine_check_vector = k7_machine_check; wmb(); - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); + printk(KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; /* Clear status for MC index 0 separately, we don't touch CTL, * as some K7 Athlons cause spurious MCEs when its enabled. */ if (boot_cpu_data.x86 == 6) { - wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); + wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); i = 1; } else i = 0; - for (; i<nr_mce_banks; i++) { - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + for (; i < nr_mce_banks; i++) { + wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); } - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index e07e8c068ae0..501ca1cea27d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -31,7 +31,7 @@ #include <asm/idle.h> #define MISC_MCELOG_MINOR 227 -#define NR_BANKS 6 +#define NR_SYSFS_BANKS 6 atomic_t mce_entry; @@ -46,7 +46,7 @@ static int mce_dont_init; */ static int tolerant = 1; static int banks; -static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; @@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) barrier(); for (i = 0; i < banks; i++) { - if (!bank[i]) + if (i < NR_SYSFS_BANKS && !bank[i]) continue; m.misc = 0; @@ -444,9 +444,10 @@ static void mce_init(void *dummy) rdmsrl(MSR_IA32_MCG_CAP, cap); banks = cap & 0xff; - if (banks > NR_BANKS) { - printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); - banks = NR_BANKS; + if (banks > MCE_EXTENDED_BANK) { + banks = MCE_EXTENDED_BANK; + printk(KERN_INFO "MCE: warning: using only %d banks\n", + MCE_EXTENDED_BANK); } /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) @@ -462,7 +463,11 @@ static void mce_init(void *dummy) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + if (i < NR_SYSFS_BANKS) + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + else + wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } @@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce); } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); -/* TBD should generate these dynamically based on number of available banks */ +/* + * TBD should generate these dynamically based on number of available banks. + * Have only 6 contol banks in /sysfs until then. + */ ACCESSOR(bank0ctl,bank[0],mce_restart()) ACCESSOR(bank1ctl,bank[1],mce_restart()) ACCESSOR(bank2ctl,bank[2],mce_restart()) diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index cb03345554a5..eef001ad3bde 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -8,7 +8,7 @@ #include <linux/interrupt.h> #include <linux/smp.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/system.h> #include <asm/msr.h> #include <asm/apic.h> @@ -32,12 +32,12 @@ struct intel_mce_extended_msrs { /* u32 *reserved[]; */ }; -static int mce_num_extended_msrs = 0; +static int mce_num_extended_msrs; #ifdef CONFIG_X86_MCE_P4THERMAL static void unexpected_thermal_interrupt(struct pt_regs *regs) -{ +{ printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", smp_processor_id()); add_taint(TAINT_MACHINE_CHECK); @@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) * be some SMM goo which handles it, so we can't even put a handler * since it might be delivered via SMI already -zwanem. */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); + rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); if ((l & (1<<3)) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", @@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) return; /* -EBUSY */ } - /* check whether a vector already exists, temporarily masked? */ + /* check whether a vector already exists, temporarily masked? */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " "installed\n", @@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ apic_write_around(APIC_LVTTHMR, h); - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); /* ok we're good to go... */ vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read(APIC_LVTTHMR); + apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); @@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) { u32 h; - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); + rdmsr(MSR_IA32_MCG_EAX, r->eax, h); + rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr(MSR_IA32_MCG_EDX, r->edx, h); + rdmsr(MSR_IA32_MCG_ESI, r->esi, h); + rdmsr(MSR_IA32_MCG_EDI, r->edi, h); + rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr(MSR_IA32_MCG_ESP, r->esp, h); + rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr(MSR_IA32_MCG_EIP, r->eip, h); } -static void intel_machine_check(struct pt_regs * regs, long error_code) +static void intel_machine_check(struct pt_regs *regs, long error_code) { - int recover=1; + int recover = 1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + recover = 0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); @@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) } if (recover & 2) - panic ("CPU context corrupt"); + panic("CPU context corrupt"); if (recover & 1) - panic ("Unable to continue"); + panic("Unable to continue"); printk(KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not * recoverable/continuable.This will allow BIOS to look at the MSRs * for errors if the OS could not log the error. */ - for (i=0; i<nr_mce_banks; i++) { + for (i = 0; i < nr_mce_banks; i++) { u32 msr; msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr, low, high); + rdmsr(msr, low, high); if (high&(1<<31)) { /* Clear it */ wrmsr(msr, 0UL, 0UL); @@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) } } mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); + wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); } @@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; - + machine_check_vector = intel_machine_check; wmb(); - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); + printk(KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - for (i=0; i<nr_mce_banks; i++) { - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + for (i = 0; i < nr_mce_banks; i++) { + wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); } - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); /* Check for P4/Xeon extended MCE MSRs */ - rdmsr (MSR_IA32_MCG_CAP, l, h); + rdmsr(MSR_IA32_MCG_CAP, l, h); if (l & (1<<9)) {/* MCG_EXT_P */ mce_num_extended_msrs = (l >> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" " available\n", smp_processor_id(), mce_num_extended_msrs); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5d241ce94a44..509bd3d9eacd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; -static u64 tom2; +u64 mtrr_tom2; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) } } - if (tom2) { - if (start >= (1ULL<<32) && (end < tom2)) + if (mtrr_tom2) { + if (start >= (1ULL<<32) && (end < mtrr_tom2)) return MTRR_TYPE_WRBACK; } @@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); } +/* fill the MSR pair relating to a var range */ +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) +{ + struct mtrr_var_range *vr; + + vr = mtrr_state.var_ranges; + + vr[index].base_lo = base_lo; + vr[index].base_hi = base_hi; + vr[index].mask_lo = mask_lo; + vr[index].mask_hi = mask_hi; +} + static void get_fixed_ranges(mtrr_type * frs) { @@ -213,13 +227,13 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; if (amd_special_default_mtrr()) { - unsigned lo, hi; + unsigned low, high; /* TOP_MEM2 */ - rdmsr(MSR_K8_TOP_MEM2, lo, hi); - tom2 = hi; - tom2 <<= 32; - tom2 |= lo; - tom2 &= 0xffffff8000000ULL; + rdmsr(MSR_K8_TOP_MEM2, low, high); + mtrr_tom2 = high; + mtrr_tom2 <<= 32; + mtrr_tom2 |= low; + mtrr_tom2 &= 0xffffff800000ULL; } if (mtrr_show) { int high_width; @@ -251,9 +265,9 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } - if (tom2) { + if (mtrr_tom2) { printk(KERN_INFO "TOM2: %016llx aka %lldM\n", - tom2, tom2>>20); + mtrr_tom2, mtrr_tom2>>20); } } mtrr_state_set = 1; @@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) if (lo != msrwords[0] || hi != msrwords[1]) { if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 15 && + (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278d9323..105afe12beb0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -37,6 +37,7 @@ #include <linux/smp.h> #include <linux/cpu.h> #include <linux/mutex.h> +#include <linux/sort.h> #include <asm/e820.h> #include <asm/mtrr.h> @@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +/* should be related to MTRR_VAR_RANGES nums */ +#define RANGE_NUM 256 + +struct res_range { + unsigned long start; + unsigned long end; +}; + +static int __init +add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + /* out of slots */ + if (nr_range >= RANGE_NUM) + return nr_range; + + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; +} + +static int __init +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + int i; + + /* try to merge it with old one */ + for (i = 0; i < nr_range; i++) { + unsigned long final_start, final_end; + unsigned long common_start, common_end; + + if (!range[i].end) + continue; + + common_start = max(range[i].start, start); + common_end = min(range[i].end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(range[i].start, start); + final_end = max(range[i].end, end); + + range[i].start = final_start; + range[i].end = final_end; + return nr_range; + } + + /* need to add that */ + return add_range(range, nr_range, start, end); +} + +static void __init +subtract_range(struct res_range *range, unsigned long start, unsigned long end) +{ + int i, j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && + range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && + range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static int __init cmp_range(const void *x1, const void *x2) +{ + const struct res_range *r1 = x1; + const struct res_range *r2 = x2; + long start1, start2; + + start1 = r1->start; + start2 = r2->start; + + return start1 - start2; +} + +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static int __initdata debug_print; + +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + nr_range = add_range_with_merge(range, nr_range, base, + base + size - 1); + } + if (debug_print) { + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_UNCACHABLE) + continue; + size = range_state[i].size_pfn; + if (!size) + continue; + base = range_state[i].base_pfn; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, + extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + if (debug_print) { + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + if (debug_print) { + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* clear those is not used */ + for (i = nr_range; i < RANGE_NUM; i++) + memset(&range[i], 0, sizeof(range[i])); + + return nr_range; +} + +static struct res_range __initdata range[RANGE_NUM]; + +#ifdef CONFIG_MTRR_SANITIZER + +static unsigned long __init sum_ranges(struct res_range *range, int nr_range) +{ + unsigned long sum; + int i; + + sum = 0; + for (i = 0; i < nr_range; i++) + sum += range[i].end + 1 - range[i].start; + + return sum; +} + +static int enable_mtrr_cleanup __initdata = + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + +struct var_mtrr_state { + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; +}; + +static void __init +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned int address_bits) +{ + u32 base_lo, base_hi, mask_lo, mask_hi; + u64 base, mask; + + if (!sizek) { + fill_mtrr_var_range(reg, 0, 0, 0, 0); + return; + } + + mask = (1ULL << address_bits) - 1; + mask &= ~((((u64)sizek) << 10) - 1); + + base = ((u64)basek) << 10; + + base |= type; + mask |= 0x800; + + base_lo = base & ((1ULL<<32) - 1); + base_hi = base >> 32; + + mask_lo = mask & ((1ULL<<32) - 1); + mask_hi = mask >> 32; + + fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); +} + +static void __init +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type) +{ + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); + range_state[reg].type = type; +} + +static void __init +set_var_mtrr_all(unsigned int address_bits) +{ + unsigned long basek, sizek; + unsigned char type; + unsigned int reg; + + for (reg = 0; reg < num_var_ranges; reg++) { + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); + type = range_state[reg].type; + + set_var_mtrr(reg, basek, sizek, type, address_bits); + } +} + +static unsigned int __init +range_to_mtrr(unsigned int reg, unsigned long range_startk, + unsigned long range_sizek, unsigned char type) +{ + if (!range_sizek || (reg >= num_var_ranges)) + return reg; + + while (range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + + /* Compute the maximum size I can make a range */ + if (range_startk) + max_align = ffs(range_startk) - 1; + else + max_align = 32; + align = fls(range_sizek) - 1; + if (align > max_align) + align = max_align; + + sizek = 1 << align; + if (debug_print) + printk(KERN_DEBUG "Setting variable MTRR %d, " + "base: %ldMB, range: %ldMB, type %s\n", + reg, range_startk >> 10, sizek >> 10, + (type == MTRR_TYPE_UNCACHABLE)?"UC": + ((type == MTRR_TYPE_WRBACK)?"WB":"Other") + ); + save_var_mtrr(reg++, range_startk, sizek, type); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= num_var_ranges) + break; + } + return reg; +} + +static unsigned __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, + unsigned long sizek) +{ + unsigned long hole_basek, hole_sizek; + unsigned long second_basek, second_sizek; + unsigned long range0_basek, range0_sizek; + unsigned long range_basek, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + + hole_basek = 0; + hole_sizek = 0; + second_basek = 0; + second_sizek = 0; + chunk_sizek = state->chunk_sizek; + gran_sizek = state->gran_sizek; + + /* align with gran size, prevent small block used up MTRRs */ + range_basek = ALIGN(state->range_startk, gran_sizek); + if ((range_basek > basek) && basek) + return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); + range_sizek = ALIGN(state->range_sizek, gran_sizek); + + while (range_sizek > state->range_sizek) { + range_sizek -= gran_sizek; + if (!range_sizek) + return 0; + } + state->range_sizek = range_sizek; + + /* try to append some small hole */ + range0_basek = state->range_startk; + range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + if (range0_sizek == state->range_sizek) { + if (debug_print) + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK); + return 0; + } + + range0_sizek -= chunk_sizek; + if (range0_sizek && sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + + } + + range_basek = range0_basek + range0_sizek; + range_sizek = chunk_sizek; + + if (range_basek + range_sizek > basek && + range_basek + range_sizek <= (basek + sizek)) { + /* one hole */ + second_basek = basek; + second_sizek = range_basek + range_sizek - basek; + } + + /* if last piece, only could one hole near end */ + if ((second_basek || !basek) && + range_sizek - (state->range_sizek - range0_sizek) - second_sizek < + (chunk_sizek >> 1)) { + /* + * one hole in middle (second_sizek is 0) or at end + * (second_sizek is 0 ) + */ + hole_sizek = range_sizek - (state->range_sizek - range0_sizek) + - second_sizek; + hole_basek = range_basek + range_sizek - hole_sizek + - second_sizek; + } else { + /* fallback for big hole, or several holes */ + range_sizek = state->range_sizek - range0_sizek; + second_basek = 0; + second_sizek = 0; + } + + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + MTRR_TYPE_WRBACK); + if (hole_sizek) { + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, + MTRR_TYPE_UNCACHABLE); + + } + + return second_sizek; +} + +static void __init +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, + unsigned long size_pfn) +{ + unsigned long basek, sizek; + unsigned long second_sizek = 0; + + if (state->reg >= num_var_ranges) + return; + + basek = base_pfn << (PAGE_SHIFT - 10); + sizek = size_pfn << (PAGE_SHIFT - 10); + + /* See if I can merge with the last range */ + if ((basek <= 1024) || + (state->range_startk + state->range_sizek == basek)) { + unsigned long endk = basek + sizek; + state->range_sizek = endk - state->range_startk; + return; + } + /* Write the range mtrrs */ + if (state->range_sizek != 0) + second_sizek = range_to_mtrr_with_hole(state, basek, sizek); + + /* Allocate an msr */ + state->range_startk = basek + second_sizek; + state->range_sizek = sizek - second_sizek; +} + +/* mininum size of mtrr block that can take hole */ +static u64 mtrr_chunk_size __initdata = (256ULL<<20); + +static int __init parse_mtrr_chunk_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_chunk_size = memparse(p, &p); + return 0; +} +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); + +/* granity of mtrr of block */ +static u64 mtrr_gran_size __initdata; + +static int __init parse_mtrr_gran_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_gran_size = memparse(p, &p); + return 0; +} +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); + +static int nr_mtrr_spare_reg __initdata = + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; + +static int __init parse_mtrr_spare_reg(char *arg) +{ + if (arg) + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); + +static int __init +x86_setup_var_mtrrs(struct res_range *range, int nr_range, + u64 chunk_size, u64 gran_size) +{ + struct var_mtrr_state var_state; + int i; + int num_reg; + + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.chunk_sizek = chunk_size >> 10; + var_state.gran_sizek = gran_size >> 10; + + memset(range_state, 0, sizeof(range_state)); + + /* Write the range etc */ + for (i = 0; i < nr_range; i++) + set_var_mtrr_range(&var_state, range[i].start, + range[i].end - range[i].start + 1); + + /* Write the last range */ + if (var_state.range_sizek != 0) + range_to_mtrr_with_hole(&var_state, 0, 0); + + num_reg = var_state.reg; + /* Clear out the extra MTRR's */ + while (var_state.reg < num_var_ranges) { + save_var_mtrr(var_state.reg, 0, 0, 0); + var_state.reg++; + } + + return num_reg; +} + +struct mtrr_cleanup_result { + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; +}; + +/* + * gran_size: 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 4G + * so we need (2+13)*6 + */ +#define NUM_RESULT 90 +#define PSHIFT (PAGE_SHIFT - 10) + +static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; +static struct res_range __initdata range_new[RANGE_NUM]; +static unsigned long __initdata min_loss_pfn[RANGE_NUM]; + +static int __init mtrr_cleanup(unsigned address_bits) +{ + unsigned long extra_remove_base, extra_remove_size; + unsigned long i, base, size, def, dummy; + mtrr_type type; + int nr_range, nr_range_new; + u64 chunk_size, gran_size; + unsigned long range_sums, range_sums_new; + int index_good; + int num_reg_good; + + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + + if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + size = range_state[i].size_pfn; + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* check if we got UC entries */ + if (!num[MTRR_TYPE_UNCACHABLE]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + extra_remove_size = 0; + if (mtrr_tom2) { + extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_size = + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, + extra_remove_size); + range_sums = sum_ranges(range, nr_range); + printk(KERN_INFO "total RAM coverred: %ldM\n", + range_sums >> (20 - PAGE_SHIFT)); + + if (mtrr_chunk_size && mtrr_gran_size) { + int num_reg; + + debug_print = 1; + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, + mtrr_gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, + extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + i = 0; + result[i].chunk_sizek = mtrr_chunk_size >> 10; + result[i].gran_sizek = mtrr_gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + if (!result[i].bad) { + set_var_mtrr_all(address_bits); + return 1; + } + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " + "will find optimal one\n"); + debug_print = 0; + memset(result, 0, sizeof(result[0])); + } + + i = 0; + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); + memset(result, 0, sizeof(result)); + for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (chunk_size = gran_size; chunk_size < (1ULL<<33); + chunk_size <<= 1) { + int num_reg; + + if (debug_print) + printk(KERN_INFO + "\ngran_size: %lldM chunk_size_size: %lldM\n", + gran_size >> 20, chunk_size >> 20); + if (i >= NUM_RESULT) + continue; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, + chunk_size, gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + result[i].chunk_sizek = chunk_size >> 10; + result[i].gran_sizek = gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + /* double check it */ + if (!result[i].bad && !result[i].lose_cover_sizek) { + if (nr_range_new != nr_range || + memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; + } + + if (!result[i].bad && (range_sums - range_sums_new < + min_loss_pfn[num_reg])) { + min_loss_pfn[num_reg] = + range_sums - range_sums_new; + } + i++; + } + } + + /* print out all */ + for (i = 0; i < NUM_RESULT; i++) { + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + } + + /* try to find the optimal index */ + if (nr_mtrr_spare_reg >= num_var_ranges) + nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; + for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { + if (!min_loss_pfn[i]) { + num_reg_good = i; + break; + } + } + + index_good = -1; + if (num_reg_good != -1) { + for (i = 0; i < NUM_RESULT; i++) { + if (!result[i].bad && + result[i].num_reg == num_reg_good && + !result[i].lose_cover_sizek) { + index_good = i; + break; + } + } + } + + if (index_good != -1) { + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + i = index_good; + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", + result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", + result[i].num_reg, + result[i].lose_cover_sizek >> 10); + /* convert ranges to var ranges state */ + chunk_size = result[i].chunk_sizek; + chunk_size <<= 10; + gran_size = result[i].gran_sizek; + gran_size <<= 10; + debug_print = 1; + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + set_var_mtrr_all(address_bits); + return 1; + } + + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + + return 0; +} +#else +static int __init mtrr_cleanup(unsigned address_bits) +{ + return 0; +} +#endif + +static int __initdata changed_by_mtrr_cleanup; + static int disable_mtrr_trim; static int __init disable_mtrr_trim_setup(char *str) @@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void) return 0; } +static u64 __init real_trim_memory(unsigned long start_pfn, + unsigned long limit_pfn) +{ + u64 trim_start, trim_size; + trim_start = start_pfn; + trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + + return e820_update_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); +} /** * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs * @end_pfn: ending page frame number @@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - u64 trim_start, trim_size; + int nr_range; + u64 total_trim_size; + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: @@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - if (amd_special_default_mtrr()) - return 0; + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } /* Find highest cached pfn */ for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; if (type != MTRR_TYPE_WRBACK) continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; if (highest_pfn < base + size) highest_pfn = base + size; } @@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) return 0; } - if (highest_pfn < end_pfn) { + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type >= MTRR_NUM_TYPES) + continue; + size = range_state[i].size_pfn; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* no entry for WB? */ + if (!num[MTRR_TYPE_WRBACK]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + nr_range = 0; + if (mtrr_tom2) { + range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); + range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; + if (highest_pfn < range[nr_range].end + 1) + highest_pfn = range[nr_range].end + 1; + nr_range++; + } + nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + + total_trim_size = 0; + /* check the head */ + if (range[0].start) + total_trim_size += real_trim_memory(0, range[0].start); + /* check the holes */ + for (i = 0; i < nr_range - 1; i++) { + if (range[i].end + 1 < range[i+1].start) + total_trim_size += real_trim_memory(range[i].end + 1, + range[i+1].start); + } + /* check the top */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) + total_trim_size += real_trim_memory(range[i].end + 1, + end_pfn); + + if (total_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %luMB of RAM.\n", - (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); + " all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); - WARN_ON(1); + if (!changed_by_mtrr_cleanup) + WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_pfn; - trim_start <<= PAGE_SHIFT; - trim_size = end_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - update_memory_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); update_e820(); + return 1; } @@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ void __init mtrr_bp_init(void) { + u32 phys_addr; init_ifs(); + phys_addr = 32; + if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; + phys_addr = 36; /* This is an AMD specific MSR, but we assume(hope?) that Intel will implement it to when they extend the address bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void) don't support PAE */ size_or_mask = 0xfff00000; /* 32 bits */ size_and_mask = 0; + phys_addr = 32; } } else { switch (boot_cpu_data.x86_vendor) { @@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - if (use_intel()) + if (use_intel()) { get_mtrr_state(); + + if (mtrr_cleanup(phys_addr)) { + changed_by_mtrr_cleanup = 1; + mtrr_if->set_all(); + } + + } } } @@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void) { if (!mtrr_if) return 0; - if (use_intel()) - mtrr_state_warn(); - else { + if (use_intel()) { + if (!changed_by_mtrr_cleanup) + mtrr_state_warn(); + } else { /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb6fea3..2dc4ec656b23 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); extern void set_mtrr_ops(struct mtrr_ops * ops); @@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) extern unsigned int num_var_ranges; +extern u64 mtrr_tom2; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f9ae93adffe5..2e9bef6e3aa3 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -1,11 +1,15 @@ -/* local apic based NMI watchdog for various CPUs. - This file also handles reservation of performance counters for coordination - with other users (like oprofile). - - Note that these events normally don't tick when the CPU idles. This means - the frequency varies with CPU load. - - Original code for K7/P6 written by Keith Owens */ +/* + * local apic based NMI watchdog for various CPUs. + * + * This file also handles reservation of performance counters for coordination + * with other users (like oprofile). + * + * Note that these events normally don't tick when the CPU idles. This means + * the frequency varies with CPU load. + * + * Original code for K7/P6 written by Keith Owens + * + */ #include <linux/percpu.h> #include <linux/module.h> @@ -36,12 +40,16 @@ struct wd_ops { static const struct wd_ops *wd_ops; -/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's - * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) +/* + * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. + * + * It will be the max for all platforms (for now) */ #define NMI_MAX_COUNTER_BITS 66 -/* perfctr_nmi_owner tracks the ownership of the perfctr registers: +/* + * perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for * different subsystems this reservation system just tries to coordinate @@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) return 0; } -/* converts an msr to an appropriate reservation bit */ -/* returns the bit offset of the event selection register */ +/* + * converts an msr to an appropriate reservation bit + * returns the bit offset of the event selection register + */ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) { /* returns the bit offset of the event selection register */ @@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr) return (!test_bit(counter, perfctr_nmi_owner)); } +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); int reserve_perfctr_nmi(unsigned int msr) { @@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr) return 1; return 0; } +EXPORT_SYMBOL(reserve_perfctr_nmi); void release_perfctr_nmi(unsigned int msr) { @@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr) clear_bit(counter, perfctr_nmi_owner); } +EXPORT_SYMBOL(release_perfctr_nmi); int reserve_evntsel_nmi(unsigned int msr) { @@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr) return 1; return 0; } +EXPORT_SYMBOL(reserve_evntsel_nmi); void release_evntsel_nmi(unsigned int msr) { @@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } - -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); -EXPORT_SYMBOL(reserve_perfctr_nmi); -EXPORT_SYMBOL(release_perfctr_nmi); -EXPORT_SYMBOL(reserve_evntsel_nmi); EXPORT_SYMBOL(release_evntsel_nmi); void disable_lapic_nmi_watchdog(void) @@ -181,7 +190,9 @@ void disable_lapic_nmi_watchdog(void) return; on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wd_ops->unreserve(); + + if (wd_ops) + wd_ops->unreserve(); BUG_ON(atomic_read(&nmi_active) != 0); } @@ -232,8 +243,8 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz) return retval; } -static void -write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) +static void write_watchdog_counter(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) { u64 count = (u64)cpu_khz * 1000; @@ -244,7 +255,7 @@ write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi } static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) + const char *descr, unsigned nmi_hz) { u64 count = (u64)cpu_khz * 1000; @@ -254,9 +265,10 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, wrmsr(perfctr_msr, (u32)(-count), 0); } -/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface - nicely stable so there is not much variety */ - +/* + * AMD K7/K8/Family10h/Family11h support. + * AMD keeps this interface nicely stable so there is not much variety + */ #define K7_EVNTSEL_ENABLE (1 << 22) #define K7_EVNTSEL_INT (1 << 20) #define K7_EVNTSEL_OS (1 << 17) @@ -289,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused + wd->cccr_msr = 0; /* unused */ return 1; } @@ -325,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) } static const struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL<<47, + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL << 47, }; -/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ - +/* + * Intel Model 6 (PPro+,P2,P3,P-M,Core1) + */ #define P6_EVNTSEL0_ENABLE (1 << 22) #define P6_EVNTSEL_INT (1 << 20) #define P6_EVNTSEL_OS (1 << 17) @@ -372,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused + wd->cccr_msr = 0; /* unused */ return 1; } static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { - /* P6 based Pentium M need to re-unmask + /* + * P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt * other P6 variant. - * ArchPerfom/Core Duo also needs this */ + * ArchPerfom/Core Duo also needs this + */ apic_write(APIC_LVTPC, APIC_DM_NMI); + /* P6/ARCH_PERFMON has 32 bit counter write */ write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); } static const struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL<<39, + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL << 39, }; -/* Intel P4 performance counters. By far the most complicated of all. */ - -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS (1<<3) -#define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -#define P4_CCCR_OVF (1<<31) - -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - CRU_ESCR0 (with any non-null event selector) through a complemented - max threshold. [IA32-Vol3, Section 14.9.9] */ +/* + * Intel P4 performance counters. + * By far the most complicated of all. + */ +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) +#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) +#define P4_ESCR_OS (1 << 3) +#define P4_ESCR_USR (1 << 2) +#define P4_CCCR_OVF_PMI0 (1 << 26) +#define P4_CCCR_OVF_PMI1 (1 << 27) +#define P4_CCCR_THRESHOLD(N) ((N) << 20) +#define P4_CCCR_COMPLEMENT (1 << 19) +#define P4_CCCR_COMPARE (1 << 18) +#define P4_CCCR_REQUIRED (3 << 16) +#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) +#define P4_CCCR_ENABLE (1 << 12) +#define P4_CCCR_OVF (1 << 31) +/* + * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + * CRU_ESCR0 (with any non-null event selector) through a complemented + * max threshold. [IA32-Vol3, Section 14.9.9] + */ static int setup_p4_watchdog(unsigned nmi_hz) { unsigned int perfctr_msr, evntsel_msr, cccr_msr; @@ -442,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz) #endif ht_num = 0; - /* performance counters are shared resources + /* + * performance counters are shared resources * assign each hyperthread its own set * (re-use the ESCR0 register, seems safe * and keeps the cccr_val the same) @@ -540,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) } static const struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL<<39, + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL << 39, }; -/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully - all future Intel CPUs. */ - +/* + * Watchdog using the Intel architected PerfMon. + * Used for Core2 and hopefully all future Intel CPUs. + */ #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK @@ -599,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused + wd->cccr_msr = 0; /* unused */ intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } static struct wd_ops intel_arch_wd_ops __read_mostly = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; static void probe_nmi_watchdog(void) @@ -624,8 +645,10 @@ static void probe_nmi_watchdog(void) wd_ops = &k7_wd_ops; break; case X86_VENDOR_INTEL: - /* Work around Core Duo (Yonah) errata AE49 where perfctr1 - doesn't have a working enable bit. */ + /* + * Work around Core Duo (Yonah) errata AE49 where perfctr1 + * doesn't have a working enable bit. + */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; @@ -636,7 +659,7 @@ static void probe_nmi_watchdog(void) } switch (boot_cpu_data.x86) { case 6: - if (boot_cpu_data.x86_model > 0xd) + if (boot_cpu_data.x86_model > 13) return; wd_ops = &p6_wd_ops; @@ -697,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; + rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) { /* perfctr still running? */ + if (ctr & wd_ops->checkbit) /* perfctr still running? */ return 0; - } + wd_ops->rearm(wd, nmi_hz); return 1; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c new file mode 100644 index 000000000000..9836a079cfd9 --- /dev/null +++ b/arch/x86/kernel/e820.c @@ -0,0 +1,1378 @@ +/* + * Handle the memory map. + * The functions here do the job until bootmem takes over. + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach <[email protected]>, December 2002. + * Venkatesh Pallipadi <[email protected]> + * + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> +#include <linux/string.h> +#include <linux/kexec.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/firmware-map.h> + +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/e820.h> +#include <asm/proto.h> +#include <asm/setup.h> +#include <asm/trampoline.h> + +/* + * The e820 map is the map that gets modified e.g. with command line parameters + * and that is also registered with modifications in the kernel resource tree + * with the iomem_resource as parent. + * + * The e820_saved is directly saved after the BIOS-provided memory map is + * copied. It doesn't get modified afterwards. It's registered for the + * /sys/firmware/memmap interface. + * + * That memory map is not modified and is used as base for kexec. The kexec'd + * kernel should get the same memory map as the firmware provides. Then the + * user can e.g. boot the original kernel with mem=1G while still booting the + * next kernel with full memory. + */ +struct e820map e820; +struct e820map e820_saved; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0xaeedbabe; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif + +/* + * This function checks if any part of the range <start,end> is mapped + * with type. + */ +int +e820_any_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(e820_any_mapped); + +/* + * This function checks if the entire range <start,end> is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of <start,end> we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* + * if start is now at or beyond end, we're done, full + * coverage + */ + if (start >= end) + return 1; + } + return 0; +} + +/* + * Add a memory region to the kernel e820 map. + */ +void __init e820_add_region(u64 start, u64 size, int type) +{ + int x = e820.nr_map; + + if (x == ARRAY_SIZE(e820.map)) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; +} + +void __init e820_print_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); + switch (e820.map[i].type) { + case E820_RAM: + case E820_RESERVED_KERN: + printk(KERN_CONT "(usable)\n"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)\n"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)\n"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; + } + } +} + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps, + * and resolving conflicting memory types in favor of highest + * numbered type. + * + * The input parameter biosmap points to an array of 'struct + * e820entry' which on entry has elements in the range [0, *pnr_map) + * valid, and which has space for up to max_nr_map entries. + * On return, the resulting sanitized e820 map entries will be in + * overwritten in the same location, starting at biosmap. + * + * The integer pointed to by pnr_map must be valid on entry (the + * current number of valid entries located at biosmap) and will + * be updated on return, with the new number of valid entries + * (something no more than max_nr_map.) + * + * The return value from sanitize_e820_map() is zero if it + * successfully 'sanitized' the map entries passed in, and is -1 + * if it did nothing, which can happen if either of (1) it was + * only passed one map entry, or (2) any of the input map entries + * were invalid (start + size < start, meaning that the size was + * so big the described memory range wrapped around through zero.) + * + * Visually we're performing the following + * (1,2,3,4 = memory types)... + * + * Sample memory map (w/overlaps): + * ____22__________________ + * ______________________4_ + * ____1111________________ + * _44_____________________ + * 11111111________________ + * ____________________33__ + * ___________44___________ + * __________33333_________ + * ______________22________ + * ___________________2222_ + * _________111111111______ + * _____________________11_ + * _________________4______ + * + * Sanitized equivalent (no overlap): + * 1_______________________ + * _44_____________________ + * ___1____________________ + * ____22__________________ + * ______11________________ + * _________1______________ + * __________3_____________ + * ___________44___________ + * _____________33_________ + * _______________2________ + * ________________1_______ + * _________________4______ + * ___________________2____ + * ____________________33__ + * ______________________4_ + */ + +int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + int *pnr_map) +{ + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ + }; + static struct change_member change_point_list[2*E820_X_MAX] __initdata; + static struct change_member *change_point[2*E820_X_MAX] __initdata; + static struct e820entry *overlap_list[E820_X_MAX] __initdata; + static struct e820entry new_bios[E820_X_MAX] __initdata; + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) + return -1; + + old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); + + /* bail out if we find any unreasonable addresses in bios map */ + for (i = 0; i < old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + return -1; + + /* create pointers for initial change-point information (for sorting) */ + for (i = 0; i < 2 * old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i = 0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing = 1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + + /* loop through change-points, determining affect on the new bios map */ + for (chgidx = 0; chgidx < chg_nr; chgidx++) { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ + current_type = 0; + for (i = 0; i < overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* + * continue building up new bios map based on this + * information + */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* + * move forward only if the new size + * was non-zero + */ + if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ + if (++new_bios_entry >= max_nr_map) + break; + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr = change_point[chgidx]->addr; + } + last_type = current_type; + } + } + /* retain count for new bios entries */ + new_nr = new_bios_entry; + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); + *pnr_map = new_nr; + + return 0; +} + +static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) +{ + while (nr_map) { + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + e820_add_region(start, size, type); + + biosmap++; + nr_map--; + } + return 0; +} + +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + */ +static int __init append_e820_map(struct e820entry *biosmap, int nr_map) +{ + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; + + return __append_e820_map(biosmap, nr_map); +} + +static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, + u64 size, unsigned old_type, + unsigned new_type) +{ + int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820x->map[i]; + u64 final_start, final_end; + if (ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + e820_add_region(final_start, final_end - final_start, + new_type); + real_updated_size += final_end - final_start; + + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_updated_size; +} + +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + return e820_update_range_map(&e820, start, size, old_type, new_type); +} + +static u64 __init e820_update_range_saved(u64 start, u64 size, + unsigned old_type, unsigned new_type) +{ + return e820_update_range_map(&e820_saved, start, size, old_type, + new_type); +} + +/* make e820 not cover the range */ +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype) +{ + int i; + u64 real_removed_size = 0; + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + + if (checktype && ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + real_removed_size += ei->size; + memset(ei, 0, sizeof(struct e820entry)); + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + real_removed_size += final_end - final_start; + + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_removed_size; +} + +void __init update_e820(void) +{ + int nr_map; + + nr_map = e820.nr_map; + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} +static void __init update_e820_saved(void) +{ + int nr_map; + + nr_map = e820_saved.nr_map; + if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) + return; + e820_saved.nr_map = nr_map; +} +#define MAX_GAP_END 0x100000000ull +/* + * Search for a gap in the e820 memory space from start_addr to end_addr. + */ +__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr) +{ + unsigned long long last; + int i = e820.nr_map; + int found = 0; + + last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; + + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + if (end < start_addr) + continue; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap >= *gapsize) { + *gapsize = gap; + *gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + return found; +} + +/* + * Search for the biggest gap in the low 32 bits of the e820 + * memory space. We pass this space to PCI to assign MMIO resources + * for hotplug or unconfigured devices in. + * Hopefully the BIOS let enough space left. + */ +__init void e820_setup_gap(void) +{ + unsigned long gapstart, gapsize, round; + int found; + + gapstart = 0x10000000; + gapsize = 0x400000; + found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); + +#ifdef CONFIG_X86_64 + if (!found) { + gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); + } +#endif + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); +} + +/** + * Because of the size limitation of struct boot_params, only first + * 128 E820 memory entries are passed to kernel via + * boot_params.e820_map, others are passed via SETUP_E820_EXT node of + * linked list of struct setup_data, which is parsed here. + */ +void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) +{ + u32 map_len; + int entries; + struct e820entry *extmap; + + entries = sdata->len / sizeof(struct e820entry); + map_len = sdata->len + sizeof(struct setup_data); + if (map_len > PAGE_SIZE) + sdata = early_ioremap(pa_data, map_len); + extmap = (struct e820entry *)(sdata->data); + __append_e820_map(extmap, entries); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + if (map_len > PAGE_SIZE) + early_iounmap(sdata, map_len); + printk(KERN_INFO "extended physical RAM map:\n"); + e820_print_map("extended"); +} + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif + +/* + * Early reserved memory areas. + */ +#define MAX_EARLY_RES 20 + +struct early_res { + u64 start, end; + char name[16]; + char overlap_ok; +}; +static struct early_res early_res[MAX_EARLY_RES] __initdata = { + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +#endif +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +#endif + {} +}; + +static int __init find_overlapped_early(u64 start, u64 end) +{ + int i; + struct early_res *r; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (end > r->start && start < r->end) + break; + } + + return i; +} + +/* + * Drop the i-th range from the early reservation map, + * by copying any higher ranges down one over it, and + * clearing what had been the last slot. + */ +static void __init drop_range(int i) +{ + int j; + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memmove(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +/* + * Split any existing ranges that: + * 1) are marked 'overlap_ok', and + * 2) overlap with the stated range [start, end) + * into whatever portion (if any) of the existing range is entirely + * below or entirely above the stated range. Drop the portion + * of the existing range that overlaps with the stated range, + * which will allow the caller of this routine to then add that + * stated range without conflicting with any existing range. + */ +static void __init drop_overlaps_that_are_ok(u64 start, u64 end) +{ + int i; + struct early_res *r; + u64 lower_start, lower_end; + u64 upper_start, upper_end; + char name[16]; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + + /* Continue past non-overlapping ranges */ + if (end <= r->start || start >= r->end) + continue; + + /* + * Leave non-ok overlaps as is; let caller + * panic "Overlapping early reservations" + * when it hits this overlap. + */ + if (!r->overlap_ok) + return; + + /* + * We have an ok overlap. We will drop it from the early + * reservation map, and add back in any non-overlapping + * portions (lower or upper) as separate, overlap_ok, + * non-overlapping ranges. + */ + + /* 1. Note any non-overlapping (lower or upper) ranges. */ + strncpy(name, r->name, sizeof(name) - 1); + + lower_start = lower_end = 0; + upper_start = upper_end = 0; + if (r->start < start) { + lower_start = r->start; + lower_end = start; + } + if (r->end > end) { + upper_start = end; + upper_end = r->end; + } + + /* 2. Drop the original ok overlapping range */ + drop_range(i); + + i--; /* resume for-loop on copied down entry */ + + /* 3. Add back in any non-overlapping ranges. */ + if (lower_end) + reserve_early_overlap_ok(lower_start, lower_end, name); + if (upper_end) + reserve_early_overlap_ok(upper_start, upper_end, name); + } +} + +static void __init __reserve_early(u64 start, u64 end, char *name, + int overlap_ok) +{ + int i; + struct early_res *r; + + i = find_overlapped_early(start, end); + if (i >= MAX_EARLY_RES) + panic("Too many early reservations"); + r = &early_res[i]; + if (r->end) + panic("Overlapping early reservations " + "%llx-%llx %s to %llx-%llx %s\n", + start, end - 1, name?name:"", r->start, + r->end - 1, r->name); + r->start = start; + r->end = end; + r->overlap_ok = overlap_ok; + if (name) + strncpy(r->name, name, sizeof(r->name) - 1); +} + +/* + * A few early reservtations come here. + * + * The 'overlap_ok' in the name of this routine does -not- mean it + * is ok for these reservations to overlap an earlier reservation. + * Rather it means that it is ok for subsequent reservations to + * overlap this one. + * + * Use this entry point to reserve early ranges when you are doing + * so out of "Paranoia", reserving perhaps more memory than you need, + * just in case, and don't mind a subsequent overlapping reservation + * that is known to be needed. + * + * The drop_overlaps_that_are_ok() call here isn't really needed. + * It would be needed if we had two colliding 'overlap_ok' + * reservations, so that the second such would not panic on the + * overlap with the first. We don't have any such as of this + * writing, but might as well tolerate such if it happens in + * the future. + */ +void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) +{ + drop_overlaps_that_are_ok(start, end); + __reserve_early(start, end, name, 1); +} + +/* + * Most early reservations come here. + * + * We first have drop_overlaps_that_are_ok() drop any pre-existing + * 'overlap_ok' ranges, so that we can then reserve this memory + * range without risk of panic'ing on an overlapping overlap_ok + * early reservation. + */ +void __init reserve_early(u64 start, u64 end, char *name) +{ + drop_overlaps_that_are_ok(start, end); + __reserve_early(start, end, name, 0); +} + +void __init free_early(u64 start, u64 end) +{ + struct early_res *r; + int i; + + i = find_overlapped_early(start, end); + r = &early_res[i]; + if (i >= MAX_EARLY_RES || r->end != end || r->start != start) + panic("free_early on not reserved area: %llx-%llx!", + start, end - 1); + + drop_range(i); +} + +void __init early_res_to_bootmem(u64 start, u64 end) +{ + int i, count; + u64 final_start, final_end; + + count = 0; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) + count++; + + printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); + for (i = 0; i < count; i++) { + struct early_res *r = &early_res[i]; + printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, + r->start, r->end, r->name); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) { + printk(KERN_CONT "\n"); + continue; + } + printk(KERN_CONT " ==> [%010llx - %010llx]\n", + final_start, final_end); + reserve_bootmem_generic(final_start, final_end - final_start, + BOOTMEM_DEFAULT); + } +} + +/* Check for already reserved areas */ +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) +{ + int i; + u64 addr = *addrp; + int changed = 0; + struct early_res *r; +again: + i = find_overlapped_early(addr, addr + size); + r = &early_res[i]; + if (i < MAX_EARLY_RES && r->end) { + *addrp = addr = round_up(r->end, align); + changed = 1; + goto again; + } + return changed; +} + +/* Check for already reserved areas */ +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + int i; + u64 addr = *addrp, last; + u64 size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + while (bad_addr(&addr, size, align) && addr+size <= ei_last) + ; + last = addr + size; + if (last > ei_last) + continue; + if (last > end) + continue; + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} + +/* + * pre allocated 4k and reserved it in e820 + */ +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +{ + u64 size = 0; + u64 addr; + u64 start; + + start = startt; + while (size < sizet) + start = find_e820_area_size(start, &size, align); + + if (size < sizet) + return 0; + + addr = round_down(start + size - sizet, align); + e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820 for early_reserve_e820\n"); + update_e820(); + update_e820_saved(); + + return addr; +} + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) +# else +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) +# endif +#else /* CONFIG_X86_32 */ +# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT +#endif + +/* + * Last pfn which the user wants to use. + */ +unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; + +/* + * Find the highest page frame number we have available + */ +unsigned long __init e820_end(void) +{ + int i; + unsigned long last_pfn = 0; + unsigned long max_arch_pfn = MAX_ARCH_PFN; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long end_pfn; + + if (ei->type != E820_RAM) + continue; + + end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; + if (end_pfn > last_pfn) + last_pfn = end_pfn; + } + + if (last_pfn > max_arch_pfn) + last_pfn = max_arch_pfn; + if (last_pfn > end_user_pfn) + last_pfn = end_user_pfn; + + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", + last_pfn, max_arch_pfn); + return last_pfn; +} + +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. + */ +int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + /* Obey end_user_pfn to save on memmap */ + if (*ei_startpfn >= end_user_pfn) + return 0; + if (*ei_endpfn > end_user_pfn) + *ei_endpfn = end_user_pfn; + + return 1; +} + +/* Walk the e820 map and register active regions within a node */ +void __init e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + int i; + + for (i = 0; i < e820.nr_map; i++) + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init e820_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - ((u64)ram << PAGE_SHIFT); +} + +static void early_panic(char *msg) +{ + early_printk(msg); + panic(msg); +} + +/* "mem=nopentium" disables the 4MB page tables. */ +static int __init parse_memopt(char *p) +{ + u64 mem_size; + + if (!p) + return -EINVAL; + +#ifdef CONFIG_X86_32 + if (!strcmp(p, "nopentium")) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + return 0; + } +#endif + + mem_size = memparse(p, &p); + end_user_pfn = mem_size>>PAGE_SHIFT; + e820_update_range(mem_size, ULLONG_MAX - mem_size, + E820_RAM, E820_RESERVED); + + return 0; +} +early_param("mem", parse_memopt); + +static int userdef __initdata; + +static int __init parse_memmap_opt(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + return -EINVAL; + + if (!strcmp(p, "exactmap")) { +#ifdef CONFIG_CRASH_DUMP + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is + * reset. + */ + saved_max_pfn = e820_end(); +#endif + e820.nr_map = 0; + userdef = 1; + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + userdef = 1; + if (*p == '@') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RESERVED); + } else { + end_user_pfn = (mem_size >> PAGE_SHIFT); + e820_update_range(mem_size, ULLONG_MAX - mem_size, + E820_RAM, E820_RESERVED); + } + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", parse_memmap_opt); + +void __init finish_e820_parsing(void) +{ + if (userdef) { + int nr = e820.nr_map; + + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + + printk(KERN_INFO "user-defined physical RAM map:\n"); + e820_print_map("user"); + } +} + +static inline const char *e820_type_to_string(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: return "System RAM"; + case E820_ACPI: return "ACPI Tables"; + case E820_NVS: return "ACPI Non-volatile Storage"; + default: return "reserved"; + } +} + +/* + * Mark e820 reserved areas as busy for the resource manager. + */ +void __init e820_reserve_resources(void) +{ + int i; + struct resource *res; + u64 end; + + res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + for (i = 0; i < e820.nr_map; i++) { + end = e820.map[i].addr + e820.map[i].size - 1; +#ifndef CONFIG_RESOURCES_64BIT + if (end > 0x100000000ULL) { + res++; + continue; + } +#endif + res->name = e820_type_to_string(e820.map[i].type); + res->start = e820.map[i].addr; + res->end = end; + + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + res++; + } + + for (i = 0; i < e820_saved.nr_map; i++) { + struct e820entry *entry = &e820_saved.map[i]; + firmware_map_add_early(entry->addr, + entry->addr + entry->size - 1, + e820_type_to_string(entry->type)); + } +} + +char *__init default_machine_specific_memory_setup(void) +{ + char *who = "BIOS-e820"; + int new_nr; + /* + * Try to copy the BIOS-supplied E820-map. + * + * Otherwise fake a memory map; one section from 0k->640k, + * the next section from 1mb->appropriate_mem_k + */ + new_nr = boot_params.e820_entries; + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &new_nr); + boot_params.e820_entries = new_nr; + if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) + < 0) { + u64 mem_size; + + /* compare results from other methods and take the greater */ + if (boot_params.alt_mem_k + < boot_params.screen_info.ext_mem_k) { + mem_size = boot_params.screen_info.ext_mem_k; + who = "BIOS-88"; + } else { + mem_size = boot_params.alt_mem_k; + who = "BIOS-e801"; + } + + e820.nr_map = 0; + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + } + + /* In case someone cares... */ + return who; +} + +char *__init __attribute__((weak)) machine_specific_memory_setup(void) +{ + return default_machine_specific_memory_setup(); +} + +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __init __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + +void __init setup_memory_map(void) +{ + char *who; + + who = memory_setup(); + memcpy(&e820_saved, &e820, sizeof(struct e820map)); + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + e820_print_map(who); +} + +#ifdef CONFIG_X86_64 +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} +#endif diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c deleted file mode 100644 index ed733e7cf4e6..000000000000 --- a/arch/x86/kernel/e820_32.c +++ /dev/null @@ -1,775 +0,0 @@ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/ioport.h> -#include <linux/string.h> -#include <linux/kexec.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/pfn.h> -#include <linux/uaccess.h> -#include <linux/suspend.h> - -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/e820.h> -#include <asm/setup.h> - -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif -extern int user_defined_memmap; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -#define ROMSIGNATURE 0xaa55 - -static int __init romsignature(const unsigned char *rom) -{ - const unsigned short * const ptr = (const unsigned short *)rom; - unsigned short sig; - - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; -} - -static int __init romchecksum(const unsigned char *rom, unsigned long length) -{ - unsigned char sum, c; - - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) - sum += c; - return !length && !sum; -} - -static void __init probe_roms(void) -{ - const unsigned char *rom; - unsigned long start, length, upper; - unsigned char c; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -void __init init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); - request_resource(res, bss_resource); -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - request_resource(res, &crashk_res); -#endif - } - } -} - -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; i<old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { - return -1; - } - - /* create pointers for initial change-point information (for sorting) */ - for (i=0; i < 2*old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i=0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if <current_addr> > <last_addr>, swap */ - /* or, if current=<start_addr> & last=<end_addr>, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; i<overlap_entries; i++) - { - if (overlap_list[i] == change_point[chgidx]->pbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; i<overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - - return 0; -} - -/* - * Find the highest page frame number we have available - */ -void __init propagate_e820_map(void) -{ - int i; - - max_pfn = 0; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - -void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr; - int i; - - print_memory_map("limit_regions start"); - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - print_memory_map("limit_regions endfor"); - return; - } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range <start,end> is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - propagate_e820_map(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); -void __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { - ei->type = new_type; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - } -} -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - print_memory_map("modified"); -} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c deleted file mode 100644 index 124480c0008d..000000000000 --- a/arch/x86/kernel/e820_64.c +++ /dev/null @@ -1,952 +0,0 @@ -/* - * Handle the memory map. - * The functions here do the job until bootmem takes over. - * - * Getting sanitize_e820_map() in sync with i386 version by applying change: - * - Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <[email protected]>, December 2002. - * Venkatesh Pallipadi <[email protected]> - * - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/ioport.h> -#include <linux/string.h> -#include <linux/kexec.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/suspend.h> -#include <linux/pfn.h> - -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/e820.h> -#include <asm/proto.h> -#include <asm/setup.h> -#include <asm/sections.h> -#include <asm/kdebug.h> -#include <asm/trampoline.h> - -struct e820map e820; - -/* - * PFN of last memory page. - */ -unsigned long end_pfn; - -/* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long max_pfn_mapped; - -/* - * Last pfn which the user wants to use. - */ -static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; - -/* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 20 - -struct early_res { - unsigned long start, end; - char name[16]; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_X86_TRAMPOLINE - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, -#endif - {} -}; - -void __init reserve_early(unsigned long start, unsigned long end, char *name) -{ - int i; - struct early_res *r; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", - start, end - 1, name?name:"", r->start, r->end - 1, r->name); - } - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - r->start = start; - r->end = end; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -void __init free_early(unsigned long start, unsigned long end) -{ - struct early_res *r; - int i, j; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (start == r->start && end == r->end) - break; - } - if (i >= MAX_EARLY_RES || !early_res[i].end) - panic("free_early on not reserved area: %lx-%lx!", start, end); - - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -void __init early_res_to_bootmem(unsigned long start, unsigned long end) -{ - int i; - unsigned long final_start, final_end; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) - continue; - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, - final_start, final_end - 1, r->name); - reserve_bootmem_generic(final_start, final_end - final_start); - } -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last >= r->start && addr < r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - unsigned long size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} -/* - * This function checks if any part of the range <start,end> is mapped - * with type. - */ -int -e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - -/* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(unsigned long start, unsigned long end, - unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} - -/* - * Find a free area with specified alignment in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1UL; -} - -/* - * Find next free range after *start - */ -unsigned long __init find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; - } - return -1UL; - -} -/* - * Find the highest page frame number we have available - */ -unsigned long __init e820_end_of_ram(void) -{ - unsigned long end_pfn; - - end_pfn = find_max_pfn_with_active_regions(); - - if (end_pfn > max_pfn_mapped) - max_pfn_mapped = end_pfn; - if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) - max_pfn_mapped = MAXMEM>>PAGE_SHIFT; - if (end_pfn > end_user_pfn) - end_pfn = end_user_pfn; - if (end_pfn > max_pfn_mapped) - end_pfn = max_pfn_mapped; - - printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); - return end_pfn; -} - -/* - * Mark e820 reserved areas as busy for the resource manager. - */ -void __init e820_reserve_resources(void) -{ - int i; - struct resource *res; - - res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); - for (i = 0; i < e820.nr_map; i++) { - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - res++; - } -} - -/* - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for software - * suspend and suspend to RAM. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long paddr; - - paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (paddr < ei->addr) - register_nosave_region(PFN_DOWN(paddr), - PFN_UP(ei->addr)); - - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), - PFN_DOWN(paddr)); - - if (paddr >= (end_pfn << PAGE_SHIFT)) - break; - } -} - -/* - * Finds an active region in the address range from start_pfn to end_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -static int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long end_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Check if max_pfn_mapped should be updated */ - if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) - max_pfn_mapped = *ei_endpfn; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= end_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > end_pfn) - *ei_endpfn = end_pfn; - - /* Obey end_user_pfn to save on memmap */ - if (*ei_startpfn >= end_user_pfn) - return 0; - if (*ei_endpfn > end_user_pfn) - *ei_endpfn = end_user_pfn; - - return 1; -} - -/* Walk the e820 map and register active regions within a node */ -void __init -e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* - * Add a memory region to the kernel e820 map. - */ -void __init add_memory_region(unsigned long start, unsigned long size, int type) -{ - int x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -unsigned long __init e820_hole_size(unsigned long start, unsigned long end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - (ram << PAGE_SHIFT); -} - -static void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - switch (e820.map[i].type) { - case E820_RAM: - printk(KERN_CONT "(usable)\n"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)\n"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)\n"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)\n"); - break; - default: - printk(KERN_CONT "type %u\n", e820.map[i].type); - break; - } - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) -{ - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following - (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= E820MAX) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - return 0; -} - -static void early_panic(char *msg) -{ - early_printk(msg); - panic(msg); -} - -/* We're not void only for x86 32-bit compat */ -char * __init machine_specific_memory_setup(void) -{ - char *who = "BIOS-e820"; - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) - early_panic("Cannot find a valid memory map"); - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - e820_print_map(who); - - /* In case someone cares... */ - return who; -} - -static int __init parse_memopt(char *p) -{ - if (!p) - return -EINVAL; - end_user_pfn = memparse(p, &p); - end_user_pfn >>= PAGE_SHIFT; - return 0; -} -early_param("mem", parse_memopt); - -static int userdef __initdata; - -static int __init parse_memmap_opt(char *p) -{ - char *oldp; - unsigned long long start_at, mem_size; - - if (!strcmp(p, "exactmap")) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is - * reset. - */ - e820_register_active_regions(0, 0, -1UL); - saved_max_pfn = e820_end_of_ram(); - remove_all_active_ranges(); -#endif - max_pfn_mapped = 0; - e820.nr_map = 0; - userdef = 1; - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - return -EINVAL; - - userdef = 1; - if (*p == '@') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*p == '#') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*p == '$') { - start_at = memparse(p+1, &p); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - end_user_pfn = (mem_size >> PAGE_SHIFT); - } - return *p == '\0' ? 0 : -EINVAL; -} -early_param("memmap", parse_memmap_opt); - -void __init finish_e820_parsing(void) -{ - if (userdef) { - char nr = e820.nr_map; - - if (sanitize_e820_map(e820.map, &nr) < 0) - early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; - - printk(KERN_INFO "user-defined physical RAM map:\n"); - e820_print_map("user"); - } -} - -void __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { - ei->type = new_type; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - } -} - -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} - -unsigned long pci_mem_start = 0xaeedbabe; -EXPORT_SYMBOL(pci_mem_start); - -/* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. - * Hopefully the BIOS let enough space left. - */ -__init void e820_setup_gap(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long last; - int i; - int found = 0; - - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - - if (!found) { - gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " - "address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource " - "registers may break!\n"); - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk(KERN_INFO - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} - -int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) -{ - int i; - - if (slot < 0 || slot >= e820.nr_map) - return -1; - for (i = slot; i < e820.nr_map; i++) { - if (e820.map[i].type != E820_RAM) - continue; - break; - } - if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) - return -1; - *addr = e820.map[i].addr; - *size = min_t(u64, e820.map[i].size + e820.map[i].addr, - max_pfn << PAGE_SHIFT) - *addr; - return i + 1; -} diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f51e1ea9e82..a4665f37cfc5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -50,7 +50,7 @@ static void __init fix_hypertransport_config(int num, int slot, int func) static void __init via_bugs(int num, int slot, int func) { #ifdef CONFIG_GART_IOMMU - if ((end_pfn > MAX_DMA32_PFN || force_iommu) && + if ((max_pfn > MAX_DMA32_PFN || force_iommu) && !gart_iommu_aperture_allowed) { printk(KERN_INFO "Looks like a VIA chipset. Disabling IOMMU." @@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func) } -static void __init ati_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_X86_IO_APIC - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } -#endif -} - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, { PCI_VENDOR_ID_VIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, - { PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, {} diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 77d424cf68b3..94382faeadb6 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -64,6 +64,17 @@ static int __init setup_noefi(char *arg) } early_param("noefi", setup_noefi); +int add_efi_memmap; +EXPORT_SYMBOL(add_efi_memmap); + +static int __init setup_add_efi_memmap(char *arg) +{ + add_efi_memmap = 1; + return 0; +} +early_param("add_efi_memmap", setup_add_efi_memmap); + + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { return efi_call_virt2(get_time, tm, tc); @@ -213,6 +224,50 @@ unsigned long efi_get_time(void) eft.minute, eft.second); } +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init do_add_efi_memmap(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + e820_add_region(start, size, e820_type); + } + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + +void __init efi_reserve_early(void) +{ + unsigned long pmap; + +#ifdef CONFIG_X86_32 + pmap = boot_params.efi_info.efi_memmap; +#else + pmap = (boot_params.efi_info.efi_memmap | + ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); +#endif + memmap.phys_map = (void *)pmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size / + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; + reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, + "EFI memmap"); +} + #if EFI_DEBUG static void __init print_efi_memmap(void) { @@ -244,19 +299,11 @@ void __init efi_init(void) #ifdef CONFIG_X86_32 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; - memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; #else efi_phys.systab = (efi_system_table_t *) (boot_params.efi_info.efi_systab | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); - memmap.phys_map = (void *) - (boot_params.efi_info.efi_memmap | - ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); #endif - memmap.nr_map = boot_params.efi_info.efi_memmap_size / - boot_params.efi_info.efi_memdesc_size; - memmap.desc_version = boot_params.efi_info.efi_memdesc_version; - memmap.desc_size = boot_params.efi_info.efi_memdesc_size; efi.systab = early_ioremap((unsigned long)efi_phys.systab, sizeof(efi_system_table_t)); @@ -370,6 +417,8 @@ void __init efi_init(void) if (memmap.desc_size != sizeof(efi_memory_desc_t)) printk(KERN_WARNING "Kernel-defined memdesc" "doesn't match the one from EFI!\n"); + if (add_efi_memmap) + do_add_efi_memmap(); /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 5d23d85624d4..4b63c8e1f13b 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -49,13 +49,13 @@ void efi_call_phys_prelog(void) local_irq_save(efi_rt_eflags); /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in + * If I don't have PAE, I should just duplicate two entries in page + * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ cr4 = read_cr4(); - if (cr4 & X86_CR4_PSE) { + if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = swapper_pg_dir[pgd_index(0)].pgd; swapper_pg_dir[0].pgd = @@ -93,7 +93,7 @@ void efi_call_phys_epilog(void) cr4 = read_cr4(); - if (cr4 & X86_CR4_PSE) { + if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = efi_bak_pg_dir_pointer[0].pgd; } else { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d0060fdcccac..652c5287215f 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void) early_runtime_code_mapping_set_exec(0); } -void __init efi_reserve_bootmem(void) -{ - reserve_bootmem_generic((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); -} - -void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { static unsigned pages_mapped __initdata; unsigned i, pages; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c778e4fa55a2..53393c306e11 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -51,14 +51,14 @@ #include <asm/percpu.h> #include <asm/dwarf2.h> #include <asm/processor-flags.h> -#include "irq_vectors.h" +#include <asm/irq_vectors.h> /* * We use macros for low-level operations which need to be overridden * for paravirtualization. The following will never clobber any registers: * INTERRUPT_RETURN (aka. "iret") * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). * * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). @@ -349,7 +349,7 @@ sysenter_past_esp: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSCALL_RET + ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) @@ -874,10 +874,10 @@ ENTRY(native_iret) .previous END(native_iret) -ENTRY(native_irq_enable_syscall_ret) +ENTRY(native_irq_enable_sysexit) sti sysexit -END(native_irq_enable_syscall_ret) +END(native_irq_enable_sysexit) #endif KPROBE_ENTRY(int3) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df522a7..466b9284ed2f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -59,8 +59,7 @@ #endif #ifdef CONFIG_PARAVIRT -ENTRY(native_irq_enable_syscall_ret) - movq %gs:pda_oldrsp,%rsp +ENTRY(native_usergs_sysret64) swapgs sysretq #endif /* CONFIG_PARAVIRT */ @@ -104,7 +103,7 @@ ENTRY(native_irq_enable_syscall_ret) .macro FAKE_STACK_FRAME child_rip /* push in order ss, rsp, eflags, cs, rip */ xorl %eax, %eax - pushq %rax /* ss */ + pushq $__KERNEL_DS /* ss */ CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET ss,0*/ pushq %rax /* rsp */ @@ -169,13 +168,13 @@ ENTRY(ret_from_fork) CFI_ADJUST_CFA_OFFSET -4 call schedule_tail GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) jnz rff_trace rff_action: RESTORE_REST testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call - testl $_TIF_IA32,threadinfo_flags(%rcx) + testl $_TIF_IA32,TI_flags(%rcx) jnz int_ret_from_sys_call RESTORE_TOP_OF_STACK %rdi,ARGOFFSET jmp ret_from_sys_call @@ -244,7 +243,8 @@ ENTRY(system_call_after_swapgs) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ + TI_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -263,7 +263,7 @@ sysret_check: GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - movl threadinfo_flags(%rcx),%edx + movl TI_flags(%rcx),%edx andl %edi,%edx jnz sysret_careful CFI_REMEMBER_STATE @@ -275,7 +275,8 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - ENABLE_INTERRUPTS_SYSCALL_RET + movq %gs:pda_oldrsp, %rsp + USERGS_SYSRET64 CFI_RESTORE_STATE /* Handle reschedules */ @@ -347,10 +348,10 @@ int_ret_from_sys_call: int_with_check: LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%edx + movl TI_flags(%rcx),%edx andl %edi,%edx jnz int_careful - andl $~TS_COMPAT,threadinfo_status(%rcx) + andl $~TS_COMPAT,TI_status(%rcx) jmp retint_swapgs /* Either reschedule or signal or syscall exit tracking needed. */ @@ -420,7 +421,6 @@ END(\label) PTREGSCALL stub_clone, sys_clone, %r8 PTREGSCALL stub_fork, sys_fork, %rdi PTREGSCALL stub_vfork, sys_vfork, %rdi - PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx PTREGSCALL stub_iopl, sys_iopl, %rsi @@ -559,7 +559,7 @@ retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: LOCKDEP_SYS_EXIT_IRQ - movl threadinfo_flags(%rcx),%edx + movl TI_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful @@ -655,9 +655,9 @@ retint_signal: /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ ENTRY(retint_kernel) - cmpl $0,threadinfo_preempt_count(%rcx) + cmpl $0,TI_preempt_count(%rcx) jnz retint_restore_args - bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) + bt $TIF_NEED_RESCHED,TI_flags(%rcx) jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args @@ -720,6 +720,10 @@ ENTRY(apic_timer_interrupt) apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt END(apic_timer_interrupt) +ENTRY(uv_bau_message_intr1) + apicinterrupt 220,uv_bau_message_interrupt +END(uv_bau_message_intr1) + ENTRY(error_interrupt) apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt END(error_interrupt) @@ -733,6 +737,7 @@ END(spurious_interrupt) */ .macro zeroentry sym INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 /* push error code/oldrax */ CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* push real oldrax to the rdi slot */ @@ -745,6 +750,7 @@ END(spurious_interrupt) .macro errorentry sym XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq %rax CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rax,0 @@ -814,7 +820,7 @@ paranoid_restore\trace: jmp irq_return paranoid_userspace\trace: GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx + movl TI_flags(%rcx),%ebx andl $_TIF_WORK_MASK,%ebx jz paranoid_swapgs\trace movq %rsp,%rdi /* &pt_regs */ @@ -912,7 +918,7 @@ error_exit: testl %eax,%eax jne retint_kernel LOCKDEP_SYS_EXIT_IRQ - movl threadinfo_flags(%rcx),%edx + movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful @@ -926,11 +932,11 @@ error_kernelspace: iret run with kernel gs again, so don't set the user space flag. B stepping K8s sometimes report an truncated RIP for IRET exceptions returning to compat mode. Check for these here too. */ - leaq irq_return(%rip),%rbp - cmpq %rbp,RIP(%rsp) + leaq irq_return(%rip),%rcx + cmpq %rcx,RIP(%rsp) je error_swapgs - movl %ebp,%ebp /* zero extend */ - cmpq %rbp,RIP(%rsp) + movl %ecx,%ecx /* zero extend */ + cmpq %rcx,RIP(%rsp) je error_swapgs cmpq $gs_change,RIP(%rsp) je error_swapgs @@ -939,7 +945,7 @@ KPROBE_END(error_entry) /* Reload gs selector with exception handling */ /* edi: new selector */ -ENTRY(load_gs_index) +ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 @@ -953,7 +959,7 @@ gs_change: CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC -ENDPROC(load_gs_index) +ENDPROC(native_load_gs_index) .section __ex_table,"a" .align 8 @@ -1120,10 +1126,6 @@ ENTRY(coprocessor_segment_overrun) zeroentry do_coprocessor_segment_overrun END(coprocessor_segment_overrun) -ENTRY(reserved) - zeroentry do_reserved -END(reserved) - /* runs on exception stack */ ENTRY(double_fault) XCPT_FRAME diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cbaaf69bedb2..1fa8be5bd217 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (num_possible_cpus() <= 8) + if (max_physical_apicid < 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index ebf13908a743..711f11c30b06 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -5,9 +5,10 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ +#include <linux/kernel.h> #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/string.h> @@ -20,6 +21,7 @@ #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> +#include <asm/pgtable.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -55,37 +57,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu) int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) { unsigned long val; - int nasid; + int pnode; - nasid = uv_apicid_to_nasid(phys_apicid); + pnode = uv_apicid_to_pnode(phys_apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_STARTUP; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { unsigned long val, apicid, lapicid; - int nasid; + int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ lapicid = apicid & 0x3f; /* ZZZ macro needed */ - nasid = uv_apicid_to_nasid(apicid); + pnode = uv_apicid_to_pnode(apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) @@ -159,39 +161,146 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ }; -static __cpuinit void set_x2apic_extra_bits(int nasid) +static __cpuinit void set_x2apic_extra_bits(int pnode) { - __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6); + __get_cpu_var(x2apic_extra_bits) = (pnode << 6); } /* * Called on boot cpu. */ +static __init int boot_pnode_to_blade(int pnode) +{ + int blade; + + for (blade = 0; blade < uv_num_possible_blades(); blade++) + if (pnode == uv_blade_info[blade].pnode) + return blade; + BUG(); +} + +struct redir_addr { + unsigned long redirect; + unsigned long alias; +}; + +#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT + +static __initdata struct redir_addr redir_addrs[] = { + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, +}; + +static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) +{ + union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + int i; + + for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { + alias.v = uv_read_local_mmr(redir_addrs[i].alias); + if (alias.s.base == 0) { + *size = (1UL << alias.s.m_alias); + redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + return; + } + } + BUG(); +} + +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + +enum map_type {map_wb, map_uc}; + +static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +{ + unsigned long bytes, paddr; + + paddr = base << shift; + bytes = (1UL << shift); + printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, + paddr + bytes); + if (map_type == map_uc) + init_extra_mapping_uc(paddr, bytes); + else + init_extra_mapping_wb(paddr, bytes); + +} +static __init void map_gru_high(int max_pnode) +{ + union uvh_rh_gam_gru_overlay_config_mmr_u gru; + int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; + + gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); + if (gru.s.enable) + map_high("GRU", gru.s.base, shift, map_wb); +} + +static __init void map_config_high(int max_pnode) +{ + union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; + int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; + + cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); + if (cfg.s.enable) + map_high("CONFIG", cfg.s.base, shift, map_uc); +} + +static __init void map_mmr_high(int max_pnode) +{ + union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; + int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; + + mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); + if (mmr.s.enable) + map_high("MMR", mmr.s.base, shift, map_uc); +} + +static __init void map_mmioh_high(int max_pnode) +{ + union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; + int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + + mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); + if (mmioh.s.enable) + map_high("MMIOH", mmioh.s.base, shift, map_uc); +} + static __init void uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; - int bytes, nid, cpu, lcpu, nasid, last_nasid, blade; - unsigned long mmr_base; + union uvh_node_id_u node_id; + unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; + int max_pnode = 0; + unsigned long mmr_base, present; + + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + m_val = m_n_config.s.m_skt; + n_val = m_n_config.s.n_skt; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); - last_nasid = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) - uv_possible_blades++; - last_nasid = nasid; - } + for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) + uv_possible_blades += + hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = alloc_bootmem_pages(bytes); + get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); uv_node_to_blade = alloc_bootmem_pages(bytes); memset(uv_node_to_blade, 255, bytes); @@ -200,43 +309,62 @@ static __init void uv_system_init(void) uv_cpu_to_blade = alloc_bootmem_pages(bytes); memset(uv_cpu_to_blade, 255, bytes); - last_nasid = -1; - blade = -1; - lcpu = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) { - blade++; - lcpu = -1; - uv_blade_info[blade].nr_posible_cpus = 0; + blade = 0; + for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { + present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); + for (j = 0; j < 64; j++) { + if (!test_bit(j, &present)) + continue; + uv_blade_info[blade].pnode = (i * 64 + j); + uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + blade++; } - last_nasid = nasid; - lcpu++; + } - uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt; - uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + blade = boot_pnode_to_blade(pnode); + lcpu = uv_blade_info[blade].nr_possible_cpus; + uv_blade_info[blade].nr_possible_cpus++; + + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; + uv_cpu_hub_info(cpu)->lowmem_remap_top = + lowmem_redir_base + lowmem_redir_size; + uv_cpu_hub_info(cpu)->m_val = m_val; + uv_cpu_hub_info(cpu)->n_val = m_val; uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; - uv_cpu_hub_info(cpu)->local_nasid = nasid; - uv_cpu_hub_info(cpu)->gnode_upper = - nasid & ~((1 << uv_hub_info->n_val) - 1); + uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1; + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ - uv_blade_info[blade].nasid = nasid; - uv_blade_info[blade].nr_posible_cpus++; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; + max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid); - printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " + "lcpu %d, blade %d\n", + cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, + lcpu, blade); } + + map_gru_high(max_pnode); + map_mmr_high(max_pnode); + map_config_high(max_pnode); + map_mmioh_high(max_pnode); } /* * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet */ void __cpuinit uv_cpu_init(void) { @@ -246,5 +374,5 @@ void __cpuinit uv_cpu_init(void) uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->local_nasid); + set_x2apic_extra_bits(uv_hub_info->pnode); } diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c new file mode 100644 index 000000000000..3e66bd364a9d --- /dev/null +++ b/arch/x86/kernel/head.c @@ -0,0 +1,55 @@ +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/setup.h> +#include <asm/bios_ebda.h> + +#define BIOS_LOWMEM_KILOBYTES 0x413 + +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. + */ +void __init reserve_ebda_region(void) +{ + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ + if (paravirt_enabled()) + return; + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); +} diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db059058927..fa1d25dd83e3 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,7 +8,34 @@ #include <linux/init.h> #include <linux/start_kernel.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/e820.h> +#include <asm/bios_ebda.h> + void __init i386_start_kernel(void) { + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + +#ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } +#endif + reserve_early(init_pg_tables_start, init_pg_tables_end, + "INIT_PG_TABLE"); + + reserve_ebda_region(); + + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + start_kernel(); } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e25c57b8aa84..c97819829146 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -25,6 +25,20 @@ #include <asm/e820.h> #include <asm/bios_ebda.h> +/* boot cpu pda */ +static struct x8664_pda _boot_cpu_pda __read_mostly; + +#ifdef CONFIG_SMP +/* + * We install an empty cpu_pda pointer table to indicate to early users + * (numa_set_node) that the cpu_pda pointer table for cpus other than + * the boot cpu is not yet setup. + */ +static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; +#else +static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; +#endif + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -51,74 +65,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_early(lowmem, 0x100000, "BIOS reserved"); -} - -static void __init reserve_setup_data(void) -{ - struct setup_data *data; - unsigned long pa_data; - char buf[32]; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, sizeof(*data)); - sprintf(buf, "setup data %x", data->type); - reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); - pa_data = data->next; - early_iounmap(data, sizeof(*data)); - } -} - void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -156,10 +102,17 @@ void __init x86_64_start_kernel(char * real_mode_data) early_printk("Kernel alive\n"); - for (i = 0; i < NR_CPUS; i++) - cpu_pda(i) = &boot_cpu_pda[i]; - + _cpu_pda = __cpu_pda; + cpu_pda(0) = &_boot_cpu_pda; pda_init(0); + + early_printk("Kernel really alive\n"); + + x86_64_start_reservations(real_mode_data); +} + +void __init x86_64_start_reservations(char *real_mode_data) +{ copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); @@ -175,7 +128,6 @@ void __init x86_64_start_kernel(char * real_mode_data) #endif reserve_ebda_region(); - reserve_setup_data(); /* * At this point everything still needed from the boot loader diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f7357cc0162c..f67e93441caf 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -194,6 +194,7 @@ default_entry: xorl %ebx,%ebx /* %ebx is kept at zero */ movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx movl $PTE_ATTR, %eax 10: @@ -219,6 +220,8 @@ default_entry: jb 10b 1: movl %edi,pa(init_pg_tables_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax @@ -228,6 +231,7 @@ default_entry: page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx movl $PTE_ATTR, %eax 10: @@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); cmpl %ebp,%eax jb 10b movl %edi,pa(init_pg_tables_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax @@ -446,10 +452,13 @@ is386: movl $2,%ecx # set MP je 1f movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - jmp initialize_secondary # all other CPUs call initialize_secondary + movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ - jmp i386_start_kernel + jmp *(initial_code) +.align 4 +ENTRY(initial_code) + .long i386_start_kernel /* * We depend on ET to be correct. This checks for 287/387. diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 10a1955bb1d1..b07ac7b217cb 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/msr.h> #include <asm/cache.h> +#include <asm/processor-flags.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -31,6 +32,13 @@ * */ +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) + +L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) +L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) +L4_START_KERNEL = pgd_index(__START_KERNEL_map) +L3_START_KERNEL = pud_index(__START_KERNEL_map) + .text .section .text.head .code64 @@ -76,8 +84,8 @@ startup_64: /* Fixup the physical addresses in the page table */ addq %rbp, init_level4_pgt + 0(%rip) - addq %rbp, init_level4_pgt + (258*8)(%rip) - addq %rbp, init_level4_pgt + (511*8)(%rip) + addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) + addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) addq %rbp, level3_ident_pgt + 0(%rip) @@ -128,7 +136,7 @@ ident_complete: /* Fixup phys_base */ addq %rbp, phys_base(%rip) -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_TRAMPOLINE addq %rbp, trampoline_level4_pgt + 0(%rip) addq %rbp, trampoline_level4_pgt + (511*8)(%rip) #endif @@ -154,9 +162,7 @@ ENTRY(secondary_startup_64) */ /* Enable PAE mode and PGE */ - xorq %rax, %rax - btsq $5, %rax - btsq $7, %rax + movl $(X86_CR4_PAE | X86_CR4_PGE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ @@ -184,19 +190,15 @@ ENTRY(secondary_startup_64) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ -#define CR0_PM 1 /* protected mode */ -#define CR0_MP (1<<1) -#define CR0_ET (1<<4) -#define CR0_NE (1<<5) -#define CR0_WP (1<<16) -#define CR0_AM (1<<18) -#define CR0_PAGING (1<<31) - movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ + X86_CR0_PG) + movl $CR0_STATE, %eax /* Make changes effective */ movq %rax, %cr0 /* Setup a boot time stack */ - movq init_rsp(%rip),%rsp + movq stack_start(%rip),%rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -208,7 +210,7 @@ ENTRY(secondary_startup_64) * addresses where we're currently running on. We have to do that here * because in 32bit we couldn't load a 64bit linear address. */ - lgdt cpu_gdt_descr(%rip) + lgdt early_gdt_descr(%rip) /* set up data segments. actually 0 would do too */ movl $__KERNEL_DS,%eax @@ -257,8 +259,9 @@ ENTRY(secondary_startup_64) .quad x86_64_start_kernel __FINITDATA - ENTRY(init_rsp) + ENTRY(stack_start) .quad init_thread_union+THREAD_SIZE-8 + .word 0 bad_address: jmp bad_address @@ -327,11 +330,11 @@ early_idt_ripmsg: ENTRY(name) /* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << 21) + (PERM) ; \ - i = i + 1 ; \ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ .endr /* @@ -342,9 +345,9 @@ ENTRY(name) */ NEXT_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 257,8,0 + .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 252,8,0 + .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE @@ -353,7 +356,7 @@ NEXT_PAGE(level3_ident_pgt) .fill 511,8,0 NEXT_PAGE(level3_kernel_pgt) - .fill 510,8,0 + .fill L3_START_KERNEL,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE @@ -384,7 +387,7 @@ NEXT_PAGE(level2_kernel_pgt) * If you want to increase this then increase MODULES_VADDR * too.) */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_spare_pgt) @@ -395,54 +398,16 @@ NEXT_PAGE(level2_spare_pgt) .data .align 16 - .globl cpu_gdt_descr -cpu_gdt_descr: - .word gdt_end-cpu_gdt_table-1 -gdt: - .quad cpu_gdt_table -#ifdef CONFIG_SMP - .rept NR_CPUS-1 - .word 0 - .quad 0 - .endr -#endif + .globl early_gdt_descr +early_gdt_descr: + .word GDT_ENTRIES*8-1 + .quad per_cpu__gdt_page ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ - - .section .data.page_aligned, "aw" - .align PAGE_SIZE - -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ - .quad 0x00af9b000000ffff /* __KERNEL_CS */ - .quad 0x00cf93000000ffff /* __KERNEL_DS */ - .quad 0x00cffb000000ffff /* __USER32_CS */ - .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ - .quad 0x00affb000000ffff /* __USER_CS */ - .quad 0x0 /* unused */ - .quad 0,0 /* TSS */ - .quad 0,0 /* LDT */ - .quad 0,0,0 /* three TLS descriptors */ - .quad 0x0000f40000000000 /* node/CPU stored in limit */ -gdt_end: - /* asm/segment.h:GDT_ENTRIES must match this */ - /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs are now dynamically allocated */ - - /* zero the remaining page */ - .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 - .section .bss, "aw", @nobits .align L1_CACHE_BYTES ENTRY(idt_table) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9b5cfcdfc426..ea230ec69057 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -17,7 +17,7 @@ /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 +#define FSEC_PER_NSEC 1000000L /* * HPET address is set in acpi/boot.c, when an ACPI entry exists @@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void) static void hpet_legacy_clockevent_register(void) { - uint64_t hpet_freq; - /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. + * The mult factor is defined as (include/linux/clockchips.h) + * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) + * hpet_period is in units of femtoseconds (per cycle), so + * mult/2^shift = cyc/ns = 10^6/hpet_period + * mult = (10^6 * 2^shift)/hpet_period + * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, hpet_clockevent.shift); + hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, + hpet_period, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); @@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = { static int hpet_clocksource_register(void) { - u64 tmp, start, now; + u64 start, now; cycle_t t1; /* Start the counter */ @@ -351,21 +350,15 @@ static int hpet_clocksource_register(void) return -ENODEV; } - /* Initialize and register HPET clocksource - * - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * approximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult + /* + * The definition of mult is (include/linux/clocksource.h) + * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc + * so we first need to convert hpet_period to ns/cyc units: + * mult/2^shift = ns/cyc = hpet_period/10^6 + * mult = (hpet_period * 2^shift)/10^6 + * mult = (hpet_period << shift)/FSEC_PER_NSEC */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; + clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); clocksource_register(&clocksource_hpet); diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259.c index fe631967d625..dc92b49d9204 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259.c @@ -1,8 +1,10 @@ +#include <linux/linkage.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> +#include <linux/timex.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/init.h> @@ -10,10 +12,12 @@ #include <linux/sysdev.h> #include <linux/bitops.h> +#include <asm/acpi.h> #include <asm/atomic.h> #include <asm/system.h> #include <asm/io.h> #include <asm/timer.h> +#include <asm/hw_irq.h> #include <asm/pgtable.h> #include <asm/delay.h> #include <asm/desc.h> @@ -32,7 +36,7 @@ static int i8259A_auto_eoi; DEFINE_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); -static struct irq_chip i8259A_chip = { +struct irq_chip i8259A_chip = { .name = "XT-PIC", .mask = disable_8259A_irq, .disable = disable_8259A_irq, @@ -125,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq) int irqmask = 1<<irq; if (irq < 8) { - outb(0x0B,PIC_MASTER_CMD); /* ISR register */ + outb(0x0B, PIC_MASTER_CMD); /* ISR register */ value = inb(PIC_MASTER_CMD) & irqmask; - outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ + outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */ return value; } - outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ + outb(0x0B, PIC_SLAVE_CMD); /* ISR register */ value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ return value; } @@ -171,12 +175,14 @@ handle_real_irq: if (irq & 8) { inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7), PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); } else { inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ + outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } spin_unlock_irqrestore(&i8259A_lock, flags); return; @@ -199,7 +205,8 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); @@ -290,17 +297,28 @@ void init_8259A(int auto_eoi) * outb_pic - this has to work on a wide range of PC hardware. */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ + + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, + to 0x20-0x27 on i386 */ + outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + + /* 8259A-1 (the master) has a slave on IR2 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); + if (auto_eoi) /* master does Auto EOI */ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); else /* master expects normal EOI */ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ + + /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ + outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + if (auto_eoi) /* * In AEOI mode we just have to mask the interrupt @@ -317,93 +335,3 @@ void init_8259A(int auto_eoi) spin_unlock_irqrestore(&i8259A_lock, flags); } - -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - extern void math_error(void __user *); - outb(0,0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->ip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .mask = CPU_MASK_NONE, - .name = "fpu", -}; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < 16; i++) { - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); - } - - /* setup after call gates are initialised (usually add in - * the architecture specific gates) - */ - intr_init_hook(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c deleted file mode 100644 index fa57a1568508..000000000000 --- a/arch/x86/kernel/i8259_64.c +++ /dev/null @@ -1,512 +0,0 @@ -#include <linux/linkage.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sysdev.h> -#include <linux/bitops.h> - -#include <asm/acpi.h> -#include <asm/atomic.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/hw_irq.h> -#include <asm/pgtable.h> -#include <asm/delay.h> -#include <asm/desc.h> -#include <asm/apic.h> -#include <asm/i8259.h> - -/* - * Common place to define all x86 IRQ vectors - * - * This builds up the IRQ handler stubs using some ugly macros in irq.h - * - * These macros create the low-level assembly IRQ routines that save - * register context and call do_IRQ(). do_IRQ() then does all the - * operations that are needed to keep the AT (or SMP IOAPIC) - * interrupt-controller happy. - */ - -#define BI(x,y) \ - BUILD_IRQ(x##y) - -#define BUILD_16_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) BI(x,f) - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) -BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) -BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) - -#undef BUILD_16_IRQS -#undef BI - - -#define IRQ(x,y) \ - IRQ##x##y##_interrupt - -#define IRQLIST_16(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) - -/* for the irq vectors */ -static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { - IRQLIST_16(0x2), IRQLIST_16(0x3), - IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), - IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) -}; - -#undef IRQ -#undef IRQLIST_16 - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<<irq; - unsigned long flags; - int ret; - - spin_lock_irqsave(&i8259A_lock, flags); - if (irq < 8) - ret = inb(PIC_MASTER_CMD) & mask; - else - ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<<irq); - set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, - "XT"); - enable_irq(irq); -} - -/* - * This function assumes to be called rarely. Switching between - * 8259A registers is slow. - * This has to be protected by the irq controller spinlock - * before being called. - */ -static inline int i8259A_irq_real(unsigned int irq) -{ - int value; - int irqmask = 1<<irq; - - if (irq < 8) { - outb(0x0B,PIC_MASTER_CMD); /* ISR register */ - value = inb(PIC_MASTER_CMD) & irqmask; - outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ - return value; - } - outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ - value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - - - - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -void __init init_ISA_irqs (void) -{ - int i; - - init_bsp_APIC(); - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } - } -} - -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); -#endif - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); - - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 4dc8600d9d20..6b220b9dcbb3 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -25,6 +25,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/sched.h> +#include <linux/bootmem.h> #include <linux/mc146818rtc.h> #include <linux/compiler.h> #include <linux/acpi.h> @@ -58,7 +59,14 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); -int timer_over_8254 __initdata = 1; +static bool mask_ioapic_irq_2 __initdata; + +void __init force_mask_ioapic_irq_2(void) +{ + mask_ioapic_irq_2 = true; +} + +int timer_through_8259 __initdata; /* * Is the SiS APIC rmw bug present ? @@ -72,15 +80,21 @@ int sis_apic_bug = -1; int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + static int disable_timer_pin_1 __initdata; /* @@ -110,7 +124,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -239,7 +253,7 @@ static void __init replace_pin_at_irq(unsigned int irq, } } -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; unsigned int pin, reg; @@ -259,30 +273,32 @@ static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsign } /* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) +static void __mask_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00010000, 0); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); } /* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) +static void __unmask_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0, 0x00010000); + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); } /* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); } /* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); } -static void mask_IO_APIC_irq (unsigned int irq) +static void mask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -291,7 +307,7 @@ static void mask_IO_APIC_irq (unsigned int irq) spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq (unsigned int irq) +static void unmask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -303,7 +319,7 @@ static void unmask_IO_APIC_irq (unsigned int irq) static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; - + /* Check delivery_mode to be sure we're not clearing an SMI pin */ entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) @@ -315,7 +331,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ioapic_mask_entry(apic, pin); } -static void clear_IO_APIC (void) +static void clear_IO_APIC(void) { int apic, pin; @@ -332,7 +348,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; cpumask_t tmp; - + cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) tmp = TARGET_CPUS; @@ -361,7 +377,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # include <linux/kernel_stat.h> /* kstat */ # include <linux/slab.h> /* kmalloc() */ # include <linux/timer.h> - + #define IRQBALANCE_CHECK_ARCH -999 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -373,14 +389,14 @@ static int physical_balance __read_mostly; static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; static struct irq_cpu_info { - unsigned long * last_irq; - unsigned long * irq_delta; + unsigned long *last_irq; + unsigned long *irq_delta; unsigned long irq; } irq_cpu_data[NR_CPUS]; #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) +#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) +#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) #define IDLE_ENOUGH(cpu,now) \ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) @@ -419,8 +435,8 @@ inside: if (cpu == -1) cpu = NR_CPUS-1; } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu,now))); + } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || + (search_idle && !IDLE_ENOUGH(cpu, now))); return cpu; } @@ -430,15 +446,14 @@ static inline void balance_irq(int cpu, int irq) unsigned long now = jiffies; cpumask_t allowed_mask; unsigned int new_cpu; - + if (irqbalance_disabled) - return; + return; cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) { + if (cpu != new_cpu) set_pending_irq(irq, cpumask_of_cpu(new_cpu)); - } } static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) @@ -450,14 +465,14 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) if (!irq_desc[j].action) continue; /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < + if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < useful_load_threshold) continue; balance_irq(i, j); } } balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); return; } @@ -486,22 +501,22 @@ static void do_irq_balance(void) /* Is this an active IRQ or balancing disabled ? */ if (!irq_desc[j].action || irq_balancing_disabled(j)) continue; - if ( package_index == i ) - IRQ_DELTA(package_index,j) = 0; + if (package_index == i) + IRQ_DELTA(package_index, j) = 0; /* Determine the total count per processor per IRQ */ value_now = (unsigned long) kstat_cpu(i).irqs[j]; /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i,j); + delta = value_now - LAST_CPU_IRQ(i, j); /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i,j) = value_now; + LAST_CPU_IRQ(i, j) = value_now; /* Ignore IRQs whose rate is less than the clock */ if (delta < useful_load_threshold) continue; /* update the load for the processor or package total */ - IRQ_DELTA(package_index,j) += delta; + IRQ_DELTA(package_index, j) += delta; /* Keep track of the higher numbered sibling as well */ if (i != package_index) @@ -527,7 +542,8 @@ static void do_irq_balance(void) max_cpu_irq = ULONG_MAX; tryanothercpu: - /* Look for heaviest loaded processor. + /* + * Look for heaviest loaded processor. * We may come back to get the next heaviest loaded processor. * Skip processors with trivial loads. */ @@ -536,7 +552,7 @@ tryanothercpu: for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; - if (max_cpu_irq <= CPU_IRQ(i)) + if (max_cpu_irq <= CPU_IRQ(i)) continue; if (tmp_cpu_irq < CPU_IRQ(i)) { tmp_cpu_irq = CPU_IRQ(i); @@ -545,8 +561,9 @@ tryanothercpu: } if (tmp_loaded == -1) { - /* In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original + /* + * In the case of small number of heavy interrupt sources, + * loading some of the cpus too much. We use Ingo's original * approach to rotate them around. */ if (!first_attempt && imbalance >= useful_load_threshold) { @@ -555,13 +572,14 @@ tryanothercpu: } goto not_worth_the_effort; } - + first_attempt = 0; /* heaviest search */ max_cpu_irq = tmp_cpu_irq; /* load */ max_loaded = tmp_loaded; /* processor */ imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* if imbalance is less than approx 10% of max load, then + + /* + * if imbalance is less than approx 10% of max load, then * observe diminishing returns action. - quit */ if (imbalance < (max_cpu_irq >> 3)) @@ -577,26 +595,25 @@ tryanotherirq: /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; - if (imbalance <= IRQ_DELTA(max_loaded,j)) + if (imbalance <= IRQ_DELTA(max_loaded, j)) continue; /* Try to find the IRQ that is closest to the imbalance * without going over. */ - if (move_this_load < IRQ_DELTA(max_loaded,j)) { - move_this_load = IRQ_DELTA(max_loaded,j); + if (move_this_load < IRQ_DELTA(max_loaded, j)) { + move_this_load = IRQ_DELTA(max_loaded, j); selected_irq = j; } } - if (selected_irq == -1) { + if (selected_irq == -1) goto tryanothercpu; - } imbalance = move_this_load; - + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors - * as much as possible. + * as much as possible. * * NOTE: the cpu_irq[] array holds the sum of the load for * sibling A and sibling B in the slot for the lowest numbered @@ -625,11 +642,11 @@ tryanotherirq: /* mark for change destination */ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - /* Since we made a change, come back sooner to + /* Since we made a change, come back sooner to * check for more variation. */ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); + balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); return; } goto tryanotherirq; @@ -640,7 +657,7 @@ not_worth_the_effort: * upward */ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); + balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); return; } @@ -679,13 +696,13 @@ static int __init balanced_irq_init(void) cpumask_t tmp; cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; + c = &boot_cpu_data; /* When not overwritten by the command line ask subarchitecture. */ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) irqbalance_disabled = NO_BALANCE_IRQ; if (irqbalance_disabled) return 0; - + /* disable irqbalance completely if there is only one processor online */ if (num_online_cpus() < 2) { irqbalance_disabled = 1; @@ -699,16 +716,14 @@ static int __init balanced_irq_init(void) physical_balance = 1; for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { printk(KERN_ERR "balanced_irq_init: out of memory"); goto failed; } - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); } - + printk(KERN_INFO "Starting balanced_irq\n"); if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) return 0; @@ -801,10 +816,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -818,13 +833,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -834,17 +849,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + for (apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -864,28 +879,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -900,7 +915,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* - * This function currently is only a helper for the i386 smp boot process where + * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ @@ -952,7 +967,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -969,118 +984,115 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { + case 0: /* conforms, ie. bus-type dependent polarity */ { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } + polarity = test_bit(bus, mp_bus_not_pci)? + default_ISA_polarity(idx): + default_PCI_polarity(idx); + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } } return polarity; } static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { + case 0: /* conforms, ie. bus-type dependent */ { - case 0: /* conforms, ie. bus-type dependent */ - { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); + trigger = test_bit(bus, mp_bus_not_pci)? + default_ISA_trigger(idx): + default_PCI_trigger(idx); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ break; } - case 1: /* edge */ + case MP_BUS_EISA: /* EISA pin */ { - trigger = 0; + trigger = default_EISA_trigger(idx); break; } - case 2: /* reserved */ + case MP_BUS_PCI: /* PCI pin */ { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; + /* set before the switch */ break; } - case 3: /* level */ + case MP_BUS_MCA: /* MCA pin */ { - trigger = 1; + trigger = default_MCA_trigger(idx); break; } - default: /* invalid */ + default: { printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; + trigger = 1; break; } } +#endif + break; + } + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } return trigger; } @@ -1097,16 +1109,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; else { /* * PCI IRQs are mapped in order @@ -1148,8 +1160,8 @@ static inline int IO_APIC_irq_trigger(int irq) for (apic = 0; apic < nr_ioapics; apic++) { for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) return irq_trigger(idx); } } @@ -1164,7 +1176,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); @@ -1176,7 +1188,7 @@ static int __assign_irq_vector(int irq) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } @@ -1203,6 +1215,11 @@ static int assign_irq_vector(int irq) return vector; } + +void setup_vector_irq(int cpu) +{ +} + static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 @@ -1237,25 +1254,25 @@ static void __init setup_IO_APIC_irqs(void) /* * add it to the IO-APIC irq-routing table: */ - memset(&entry,0,sizeof(entry)); + memset(&entry, 0, sizeof(entry)); entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - idx = find_irq_entry(apic,pin,mp_INT); + idx = find_irq_entry(apic, pin, mp_INT); if (idx == -1) { if (first_notcon) { apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, + mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); + mp_ioapics[apic].mp_apicid, pin); continue; } @@ -1289,7 +1306,7 @@ static void __init setup_IO_APIC_irqs(void) vector = assign_irq_vector(irq); entry.vector = vector; ioapic_register_intr(irq, vector, IOAPIC_AUTO); - + if (!apic && (irq < 16)) disable_8259A_irq(irq); } @@ -1302,25 +1319,21 @@ static void __init setup_IO_APIC_irqs(void) } /* - * Set up the 8259A-master output pin: + * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) { struct IO_APIC_route_entry entry; - memset(&entry,0,sizeof(entry)); - - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + memset(&entry, 0, sizeof(entry)); /* * We use logical delivery to get the timer IRQ * to the first CPU. */ entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ + entry.mask = 1; /* mask IRQ now */ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; @@ -1329,17 +1342,14 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... + * scene we may have a 8259A-master in AEOI mode ... */ - irq_desc[0].chip = &ioapic_chip; - set_irq_handler(0, handle_edge_irq); + ioapic_register_intr(0, vector, IOAPIC_EDGE); /* * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); } void __init print_IO_APIC(void) @@ -1354,10 +1364,10 @@ void __init print_IO_APIC(void) if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1376,7 +1386,7 @@ void __init print_IO_APIC(void) reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -1459,7 +1469,7 @@ void __init print_IO_APIC(void) #if 0 -static void print_APIC_bitfield (int base) +static void print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1480,7 +1490,7 @@ static void print_APIC_bitfield (int base) } } -void /*__init*/ print_local_APIC(void * dummy) +void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1489,6 +1499,7 @@ void /*__init*/ print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); @@ -1563,7 +1574,7 @@ void /*__init*/ print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +void print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1, 1); } @@ -1586,11 +1597,11 @@ void /*__init*/ print_PIC(void) v = inb(0xa0) << 8 | inb(0x20); printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - outb(0x0b,0xa0); - outb(0x0b,0x20); + outb(0x0b, 0xa0); + outb(0x0b, 0x20); v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); + outb(0x0a, 0xa0); + outb(0x0a, 0x20); spin_unlock_irqrestore(&i8259A_lock, flags); @@ -1626,7 +1637,7 @@ static void __init enable_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - for(apic = 0; apic < nr_ioapics; apic++) { + for (apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { @@ -1716,7 +1727,6 @@ void disable_IO_APIC(void) * by Matt Domsch <[email protected]> Tue Dec 21 12:25:05 CST 1999 */ -#ifndef CONFIG_X86_NUMAQ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; @@ -1726,6 +1736,11 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; +#ifdef CONFIG_X86_NUMAQ + if (found_numaq) + return; +#endif + /* * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. @@ -1748,15 +1763,15 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mpc_apicid; - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; } /* @@ -1765,9 +1780,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { + mp_ioapics[apic].mp_apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -1776,13 +1791,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; + mp_ioapics[apic].mp_apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -1791,21 +1806,21 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mpc_apicid) + if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; /* * Read the right value from the MPC table and * write it into the ID register. - */ + */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -1816,15 +1831,12 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); } } -#else -static void __init setup_ioapic_ids_from_mpc(void) { } -#endif int no_timer_check __initdata; @@ -2020,7 +2032,7 @@ static void ack_apic(unsigned int irq) ack_APIC_irq(); } -static void mask_lapic_irq (unsigned int irq) +static void mask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2028,7 +2040,7 @@ static void mask_lapic_irq (unsigned int irq) apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); } -static void unmask_lapic_irq (unsigned int irq) +static void unmask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2037,7 +2049,7 @@ static void unmask_lapic_irq (unsigned int irq) } static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC-edge", + .name = "local-APIC", .mask = mask_lapic_irq, .unmask = unmask_lapic_irq, .eoi = ack_apic, @@ -2046,14 +2058,14 @@ static struct irq_chip lapic_chip __read_mostly = { static void __init setup_nmi(void) { /* - * Dirty trick to enable the NMI watchdog ... + * Dirty trick to enable the NMI watchdog ... * We put the 8259A master into AEOI mode and * unmask on all local APICs LVT0 as NMI. * * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') * is from Maciej W. Rozycki - so we do not have to EOI from * the NMI handler or the timer interrupt. - */ + */ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); enable_NMI_through_LVT0(); @@ -2129,11 +2141,16 @@ static inline void __init unlock_ExtINT_logic(void) static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; + int no_pin1 = 0; int vector; + unsigned int ver; unsigned long flags; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2142,17 +2159,17 @@ static inline void __init check_timer(void) set_intr_gate(vector, interrupt[0]); /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; - if (timer_over_8254 > 0) - enable_8259A_irq(0); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2162,14 +2179,36 @@ static inline void __init check_timer(void) printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); + if (mask_ioapic_irq_2) + mask_IO_APIC_irq(2); + + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + if (pin1 != -1) { /* * Ok, does IRQ0 through the IOAPIC work? */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, vector); + } unmask_IO_APIC_irq(0); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); } @@ -2178,43 +2217,46 @@ static inline void __init check_timer(void) goto out; } clear_IO_APIC_pin(apic1, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " - "IO-APIC\n"); - } + if (!no_pin1) + printk(KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); - if (pin2 != -1) { + printk(KERN_INFO "...trying to set up timer (IRQ0) " + "through the 8259A ... "); printk("\n..... (found pin %d) ...", pin2); /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); if (timer_irq_works()) { printk("works.\n"); - if (pin1 != -1) - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - else - add_pin_to_irq(0, apic2, pin2); + timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); setup_nmi(); + enable_8259A_irq(0); } goto out; } /* * Cleanup, just in case ... */ + disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); + printk(" failed.\n"); } - printk(" failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; + nmi_watchdog = NMI_NONE; } + timer_ack = 0; printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - disable_8259A_irq(0); set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, "fasteoi"); apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ @@ -2224,12 +2266,12 @@ static inline void __init check_timer(void) printk(" works.\n"); goto out; } + disable_8259A_irq(0); apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); printk(" failed.\n"); printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); - timer_ack = 0; init_8259A(0); make_8259A_irq(0); apic_write_around(APIC_LVT0, APIC_DM_EXTINT); @@ -2261,7 +2303,7 @@ void __init setup_IO_APIC(void) int i; /* Reserve all the system vectors. */ - for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + for (i = first_system_vector; i < NR_VECTORS; i++) set_bit(i, used_vectors); enable_IO_APIC(); @@ -2286,28 +2328,14 @@ void __init setup_IO_APIC(void) print_IO_APIC(); } -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path */ - + static int __init io_apic_bug_finalize(void) { - if(sis_apic_bug == -1) + if (sis_apic_bug == -1) sis_apic_bug = 0; return 0; } @@ -2318,17 +2346,17 @@ struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; }; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; +static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; struct sysfs_ioapic_data *data; int i; - + data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) entry[i] = ioapic_read_entry(dev->id, i); return 0; @@ -2341,18 +2369,18 @@ static int ioapic_resume(struct sys_device *dev) unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - + data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); return 0; @@ -2366,24 +2394,23 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { - struct sys_device * dev; + struct sys_device *dev; int i, size, error = 0; error = sysdev_class_register(&ioapic_sysdev_class); if (error) return error; - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] + for (i = 0; i < nr_ioapics; i++) { + size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; - dev->id = i; + dev->id = i; dev->cls = &ioapic_sysdev_class; error = sysdev_register(dev); if (error) { @@ -2458,7 +2485,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_lo = MSI_ADDR_BASE_LO | ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: +MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | ((INT_DELIVERY_MODE != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: @@ -2469,7 +2496,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: +MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(vector); } @@ -2640,12 +2667,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #endif /* CONFIG_HT_IRQ */ /* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration + ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI -int __init io_apic_get_unique_id (int ioapic, int apic_id) +int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -2654,10 +2681,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) int i = 0; /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only * supports up to 16 on one shared APIC bus. - * + * * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full * advantage of new APIC bus architecture. */ @@ -2676,7 +2703,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) } /* - * Every APIC in a system must have a unique ID or we get lots of nice + * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(apic_id_map, apic_id)) { @@ -2693,7 +2720,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) "trying %d\n", ioapic, apic_id, i); apic_id = i; - } + } tmp = apicid_to_cpu_present(apic_id); physids_or(apic_id_map, apic_id_map, tmp); @@ -2720,7 +2747,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) } -int __init io_apic_get_version (int ioapic) +int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -2733,7 +2760,7 @@ int __init io_apic_get_version (int ioapic) } -int __init io_apic_get_redir_entries (int ioapic) +int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -2746,7 +2773,7 @@ int __init io_apic_get_redir_entries (int ioapic) } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; @@ -2762,7 +2789,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * corresponding device driver registers for this IRQ. */ - memset(&entry,0,sizeof(entry)); + memset(&entry, 0, sizeof(entry)); entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; @@ -2781,7 +2808,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); @@ -2802,8 +2829,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; @@ -2836,3 +2863,34 @@ static int __init parse_noapic(char *arg) return 0; } early_param("noapic", parse_noapic); + +void __init ioapic_init_mappings(void) +{ + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mp_apicaddr; + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } + } else { +fake_ioapic_page: + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = __pa(ioapic_phys); + } + set_fixmap_nocache(idx, ioapic_phys); + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + } +} + diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc529..0494cdb270c5 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -61,7 +61,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { +static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { static int assign_irq_vector(int irq, cpumask_t mask); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + #define __apicdebuginit __init int sis_apic_bug; /* not actually supported, dummy for compile */ @@ -90,7 +94,14 @@ static int no_timer_check; static int disable_timer_pin_1 __initdata; -int timer_over_8254 __initdata = 1; +static bool mask_ioapic_irq_2 __initdata; + +void __init force_mask_ioapic_irq_2(void) +{ + mask_ioapic_irq_2 = true; +} + +int timer_through_8259 __initdata; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -104,15 +115,17 @@ DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -140,7 +153,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -183,7 +196,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) break; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ - if ((reg >> 14) & 1) { + if (reg & IO_APIC_REDIR_REMOTE_IRR) { spin_unlock_irqrestore(&ioapic_lock, flags); return true; } @@ -298,7 +311,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) break; io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~0x000000ff; + reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; io_apic_modify(apic, reg); if (!entry->next) @@ -360,16 +373,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_pin_list *entry = irq_2_pin + irq; + + while (1) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + } + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +} + #define DO_ACTION(name,R,ACTION, FINAL) \ \ static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) static void mask_IO_APIC_irq (unsigned int irq) { @@ -430,20 +464,6 @@ static int __init disable_timer_pin_setup(char *arg) } __setup("disable_timer_pin_1", disable_timer_pin_setup); -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - /* * Find the IRQ entry number of a certain pin. @@ -453,10 +473,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -470,13 +490,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -486,17 +506,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -516,28 +536,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -565,13 +585,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -607,13 +627,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -660,16 +680,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -730,7 +750,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; @@ -788,7 +808,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -void __setup_vector_irq(int cpu) +static void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -811,6 +831,13 @@ void __setup_vector_irq(int cpu) } } +void setup_vector_irq(int cpu) +{ + spin_lock(&vector_lock); + __setup_vector_irq(smp_processor_id()); + spin_unlock(&vector_lock); +} + static struct irq_chip ioapic_chip; @@ -846,7 +873,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); /* @@ -887,10 +914,10 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } if (!first_notcon) { @@ -911,26 +938,21 @@ static void __init setup_IO_APIC_irqs(void) } /* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. + * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - /* * We use logical delivery to get the timer IRQ * to the first CPU. */ entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ + entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; @@ -939,7 +961,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... + * scene we may have a 8259A-master in AEOI mode ... */ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); @@ -947,8 +969,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); } void __apicdebuginit print_IO_APIC(void) @@ -965,7 +985,7 @@ void __apicdebuginit print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -983,7 +1003,7 @@ void __apicdebuginit print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1077,6 +1097,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); @@ -1659,6 +1680,7 @@ static inline void __init check_timer(void) struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; unsigned long flags; + int no_pin1 = 0; local_irq_save(flags); @@ -1669,16 +1691,11 @@ static inline void __init check_timer(void) assign_irq_vector(0, TARGET_CPUS); /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -1688,15 +1705,41 @@ static inline void __init check_timer(void) apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", cfg->vector, apic1, pin1, apic2, pin2); + if (mask_ioapic_irq_2) + mask_IO_APIC_irq(2); + + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + + replace_pin_at_irq(0, 0, 0, apic1, pin1); + apic1 = 0; + pin1 = 0; + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + if (pin1 != -1) { /* * Ok, does IRQ0 through the IOAPIC work? */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } unmask_IO_APIC_irq(0); if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); } @@ -1705,42 +1748,47 @@ static inline void __init check_timer(void) goto out; } clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + if (!no_pin1) + apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); - if (pin2 != -1) { + apic_printk(APIC_VERBOSE,KERN_INFO + "...trying to set up timer (IRQ0) " + "through the 8259A ... "); apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", apic2, pin2); /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - nmi_watchdog_default(); + timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); setup_nmi(); + enable_8259A_irq(0); } goto out; } /* * Cleanup, just in case ... */ + disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); + apic_printk(APIC_VERBOSE," failed.\n"); } - apic_printk(APIC_VERBOSE," failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; + nmi_watchdog = NMI_NONE; } apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - disable_8259A_irq(0); irq_desc[0].chip = &lapic_irq_type; apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -1749,6 +1797,7 @@ static inline void __init check_timer(void) apic_printk(APIC_VERBOSE," works.\n"); goto out; } + disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); @@ -1841,8 +1890,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2242,8 +2291,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; @@ -2336,7 +2385,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; } else { ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index c0df7b89ca23..9d98cda39ad9 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -8,7 +8,6 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/cache.h> -#include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/module.h> diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352df28b9..47a6f6f12478 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq) #endif } +#ifdef CONFIG_DEBUG_STACKOVERFLOW +/* Debugging check for stack overflow: is there less than 1KB free? */ +static int check_stack_overflow(void) +{ + long sp; + + __asm__ __volatile__("andl %%esp,%0" : + "=r" (sp) : "0" (THREAD_SIZE - 1)); + + return sp < (sizeof(struct thread_info) + STACK_WARN); +} + +static void print_stack_overflow(void) +{ + printk(KERN_WARNING "low stack detected by irq handler\n"); + dump_stack(); +} + +#else +static inline int check_stack_overflow(void) { return 0; } +static inline void print_stack_overflow(void) { } +#endif + #ifdef CONFIG_4KSTACKS /* * per-CPU IRQ handling contexts (thread information and stack) @@ -59,48 +82,29 @@ union irq_ctx { static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -#endif -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - int irq = ~regs->orig_ax; - struct irq_desc *desc = irq_desc + irq; -#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx, *irqctx; - u32 *isp; -#endif +static char softirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__section__(".bss.page_aligned"))); - if (unlikely((unsigned)irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, irq); - BUG(); - } +static char hardirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__section__(".bss.page_aligned"))); - old_regs = set_irq_regs(regs); - irq_enter(); -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 1KB free? */ - { - long sp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (sp) : "0" (THREAD_SIZE - 1)); - if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif +static void call_on_stack(void *func, void *stack) +{ + asm volatile("xchgl %%ebx,%%esp \n" + "call *%%edi \n" + "movl %%ebx,%%esp \n" + : "=b" (stack) + : "0" (stack), + "D"(func) + : "memory", "cc", "edx", "ecx", "eax"); +} -#ifdef CONFIG_4KSTACKS +static inline int +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) +{ + union irq_ctx *curctx, *irqctx; + u32 *isp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); irqctx = hardirq_ctx[smp_processor_id()]; @@ -111,52 +115,39 @@ unsigned int do_IRQ(struct pt_regs *regs) * handler) we can't do that and just have to keep using the * current stack (which is the irq stack already after all) */ - if (curctx != irqctx) { - int arg1, arg2, bx; - - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; + if (unlikely(curctx == irqctx)) + return 0; - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (bx) - : "0" (irq), "1" (desc), "2" (isp), - "D" (desc->handle_irq) - : "memory", "cc", "ecx" - ); - } else -#endif - desc->handle_irq(irq, desc); + /* build the stack frame on the IRQ stack */ + isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; - irq_exit(); - set_irq_regs(old_regs); + /* + * Copy the softirq bits in preempt_count so that the + * softirq checks work in the hardirq context. + */ + irqctx->tinfo.preempt_count = + (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + + if (unlikely(overflow)) + call_on_stack(print_stack_overflow, isp); + + asm volatile("xchgl %%ebx,%%esp \n" + "call *%%edi \n" + "movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (isp) + : "0" (irq), "1" (desc), "2" (isp), + "D" (desc->handle_irq) + : "memory", "cc", "ecx"); return 1; } -#ifdef CONFIG_4KSTACKS - -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - /* * allocate per-cpu stacks for hardirq and for softirq processing */ -void irq_ctx_init(int cpu) +void __cpuinit irq_ctx_init(int cpu) { union irq_ctx *irqctx; @@ -164,25 +155,25 @@ void irq_ctx_init(int cpu) return; irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); hardirq_ctx[cpu] = irqctx; irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = 0; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); softirq_ctx[cpu] = irqctx; - printk("CPU %u irqstacks, hard=%p soft=%p\n", - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); + printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", + cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); } void irq_ctx_exit(int cpu) @@ -211,25 +202,56 @@ asmlinkage void do_softirq(void) /* build the stack frame on the softirq stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_softirq \n" - " movl %%ebx,%%esp \n" - : "=b"(isp) - : "0"(isp) - : "memory", "cc", "edx", "ecx", "eax" - ); + call_on_stack(__do_softirq, isp); /* * Shouldnt happen, we returned above if in_interrupt(): - */ + */ WARN_ON_ONCE(softirq_count()); } local_irq_restore(flags); } + +#else +static inline int +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } #endif /* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + /* high bit used in ret_from_ code */ + int overflow, irq = ~regs->orig_ax; + struct irq_desc *desc = irq_desc + irq; + + if (unlikely((unsigned)irq >= NR_IRQS)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d\n", + __func__, irq); + BUG(); + } + + old_regs = set_irq_regs(regs); + irq_enter(); + + overflow = check_stack_overflow(); + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); + desc->handle_irq(irq, desc); + } + + irq_exit(); + set_irq_regs(old_regs); + return 1; +} + +/* * Interrupt statistics: */ @@ -313,16 +335,20 @@ skip: per_cpu(irq_stat,j).irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); +#endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); @@ -331,6 +357,40 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = nmi_count(cpu); + +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += per_cpu(irq_stat, cpu).irq_resched_count; + sum += per_cpu(irq_stat, cpu).irq_call_count; + sum += per_cpu(irq_stat, cpu).irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += per_cpu(irq_stat, cpu).irq_thermal_count; +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU #include <mach_apic.h> diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac15466a91..1f78b238d8d2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -135,6 +135,7 @@ skip: seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); @@ -143,6 +144,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); +#endif seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); @@ -153,6 +155,32 @@ skip: } /* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = cpu_pda(cpu)->__nmi_count; + + sum += cpu_pda(cpu)->apic_timer_irqs; +#ifdef CONFIG_SMP + sum += cpu_pda(cpu)->irq_resched_count; + sum += cpu_pda(cpu)->irq_call_count; + sum += cpu_pda(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += cpu_pda(cpu)->irq_thermal_count; + sum += cpu_pda(cpu)->irq_threshold_count; +#endif + sum += cpu_pda(cpu)->irq_spurious_count; + return sum; +} + +u64 arch_irq_stat(void) +{ + return atomic_read(&irq_err_count); +} + +/* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c new file mode 100644 index 000000000000..d66914287ee1 --- /dev/null +++ b/arch/x86/kernel/irqinit_32.c @@ -0,0 +1,114 @@ +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/sysdev.h> +#include <linux/bitops.h> + +#include <asm/atomic.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/timer.h> +#include <asm/pgtable.h> +#include <asm/delay.h> +#include <asm/desc.h> +#include <asm/apic.h> +#include <asm/arch_hooks.h> +#include <asm/i8259.h> + + + +/* + * Note that on a 486, we don't want to do a SIGFPE on an irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ + + +static irqreturn_t math_error_irq(int cpl, void *dev_id) +{ + extern void math_error(void __user *); + outb(0,0xF0); + if (ignore_fpu_irq || !boot_cpu_data.hard_math) + return IRQ_NONE; + math_error((void __user *)get_irq_regs()->ip); + return IRQ_HANDLED; +} + +/* + * New motherboards sometimes make IRQ 13 be a PCI interrupt, + * so allow interrupt sharing. + */ +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .mask = CPU_MASK_NONE, + .name = "fpu", +}; + +void __init init_ISA_irqs (void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + /* + * 16 old-style INTA-cycle interrupts: + */ + for (i = 0; i < 16; i++) { + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } +} + +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + pre_intr_init_hook(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) + set_intr_gate(vector, interrupt[i]); + } + + /* setup after call gates are initialised (usually add in + * the architecture specific gates) + */ + intr_init_hook(); + + /* + * External FPU? Set up irq13 if so, for + * original braindamaged IBM FERR coupling. + */ + if (boot_cpu_data.hard_math && !cpu_has_fpu) + setup_irq(FPU_IRQ, &fpu_irq); + + irq_ctx_init(smp_processor_id()); +} diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c new file mode 100644 index 000000000000..31f49e8f46a7 --- /dev/null +++ b/arch/x86/kernel/irqinit_64.c @@ -0,0 +1,217 @@ +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/sysdev.h> +#include <linux/bitops.h> + +#include <asm/acpi.h> +#include <asm/atomic.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/pgtable.h> +#include <asm/delay.h> +#include <asm/desc.h> +#include <asm/apic.h> +#include <asm/i8259.h> + +/* + * Common place to define all x86 IRQ vectors + * + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that save + * register context and call do_IRQ(). do_IRQ() then does all the + * operations that are needed to keep the AT (or SMP IOAPIC) + * interrupt-controller happy. + */ + +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +/* + * SMP has a few special interrupts for IPI messages + */ + +#define BUILD_IRQ(nr) \ + asmlinkage void IRQ_NAME(nr); \ + asm("\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ + "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + +#define BI(x,y) \ + BUILD_IRQ(x##y) + +#define BUILD_16_IRQS(x) \ + BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ + BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ + BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ + BI(x,c) BI(x,d) BI(x,e) BI(x,f) + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) +BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) +BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) +BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) + +#undef BUILD_16_IRQS +#undef BI + + +#define IRQ(x,y) \ + IRQ##x##y##_interrupt + +#define IRQLIST_16(x) \ + IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ + IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ + IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ + IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) + +/* for the irq vectors */ +static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { + IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), + IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), + IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) +}; + +#undef IRQ +#undef IRQLIST_16 + + + + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ + +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + +static void __init init_ISA_irqs (void) +{ + int i; + + init_bsp_APIC(); + init_8259A(0); + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + + if (i < 16) { + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } else { + /* + * 'high' PCI IRQs filled in on demand + */ + irq_desc[i].chip = &no_irq_chip; + } + } +} + +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != IA32_SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + +#ifdef CONFIG_SMP + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); +#endif + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); + + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + if (!acpi_ioapic) + setup_irq(2, &irq2); +} diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0224c3637c73..21f2bae98c15 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -20,9 +20,9 @@ #include <asm/mmu_context.h> #ifdef CONFIG_SMP -static void flush_ldt(void *null) +static void flush_ldt(void *current_mm) { - if (current->active_mm) + if (current->active_mm == current_mm) load_LDT(¤t->active_mm->context); } #endif @@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); + smp_call_function(flush_ldt, current->mm, 1, 1); preempt_enable(); #else load_LDT(pc); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c9fc31..f4960171bc66 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit) curidt.address = (unsigned long)newidt; load_idt(&curidt); -}; +} static void set_gdt(void *newgdt, __u16 limit) @@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit) curgdt.address = (unsigned long)newgdt; load_gdt(&curgdt); -}; +} static void load_segments(void) { diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 576a03db4511..7830dc4a8380 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -110,7 +110,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { pgd_t *level4p; level4p = (pgd_t *)__va(start_pgtable); - return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); + return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); } static void set_idt(void *newidt, u16 limit) diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 69729e38b78a..9758fea87c5b 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -5,13 +5,14 @@ * 2006 Shaohua Li <[email protected]> * * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, + * belonging to IA-32 family - PentiumPro, Pentium II, * Pentium III, Xeon, Pentium 4, etc. * - * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, - * Order Number 245472 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/245472.htm + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm * * For more information, go to http://www.urbanmyth.org/microcode * @@ -58,12 +59,12 @@ * nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian <[email protected]> * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble <[email protected]>, + * 1.12 29 Sep 2003 Nitin Kamble <[email protected]>, * Jun Nakajima <[email protected]> * Support for the microcode updates in the new format. * 1.13 10 Oct 2003 Tigran Aivazian <[email protected]> * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode + * because we no longer hold a copy of applied microcode * in kernel memory. * 1.14 25 Jun 2004 Tigran Aivazian <[email protected]> * Fix sigmatch() macro to handle old CPUs with pf == 0. @@ -320,11 +321,11 @@ static void apply_microcode(int cpu) return; /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); + spin_lock_irqsave(µcode_update_lock, flags); /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, + (unsigned long) uci->mc->bits, (unsigned long) uci->mc->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); @@ -341,7 +342,7 @@ static void apply_microcode(int cpu) return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", + "0x%x to 0x%x, date = %08x \n", cpu_num, uci->rev, val[1], uci->mc->hdr.date); uci->rev = val[1]; } @@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu) c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { - pr_debug("microcode: ucode data file %s load failed\n", name); + pr_debug("microcode: data file %s load failed\n", name); return error; } buf = firmware->data; @@ -805,6 +806,9 @@ static int __init microcode_init (void) { int error; + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <[email protected]>\n"); + error = microcode_dev_init(); if (error) return error; @@ -825,9 +829,6 @@ static int __init microcode_init (void) } register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <[email protected]>\n"); return 0; } diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbfe85c0..fdfdc550b366 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include <asm/io.h> #include <asm/msr.h> #include <asm/acpi.h> +#include <asm/mmconfig.h> #include "../pci/pci.h" diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 404683b94e79..8b6b1e05c306 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -25,6 +25,8 @@ #include <asm/proto.h> #include <asm/acpi.h> #include <asm/bios_ebda.h> +#include <asm/e820.h> +#include <asm/trampoline.h> #include <mach_apic.h> #ifdef CONFIG_X86_32 @@ -32,28 +34,6 @@ #include <mach_mpparse.h> #endif -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; - -static int mp_current_pci_id; - -int pic_mode; - -/* - * Intel MP BIOS table parsing routines: - */ - /* * Checksum an MP configuration block. */ @@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len) } #ifdef CONFIG_X86_NUMAQ +int found_numaq; /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ +struct mpc_config_translation { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; + +static inline int generate_logical_apicid(int quad, int phys_apicid) +{ + return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); +} + + +static inline int mpc_apic_id(struct mpc_config_processor *m, + struct mpc_config_translation *translation_record) +{ + int quad = translation_record->trans_quad; + int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); + + printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); + return logical_apicid; +} + +int mp_bus_id_to_node[MAX_MP_BUSSES]; + +int mp_bus_id_to_local[MAX_MP_BUSSES]; + +static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = local; + printk(KERN_INFO "Bus #%d is %s (node %d)\n", + m->mpc_busid, name, quad); +} + +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +static void mpc_oem_pci_bus(struct mpc_config_bus *m, + struct mpc_config_translation *translation) +{ + int quad = translation->trans_quad; + int local = translation->trans_local; + + quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; +} + #endif static void __cpuinit MP_processor_info(struct mpc_config_processor *m) @@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) return; } #ifdef CONFIG_X86_NUMAQ - apicid = mpc_apic_id(m, translation_table[mpc_record]); + if (found_numaq) + apicid = mpc_apic_id(m, translation_table[mpc_record]); + else + apicid = m->mpc_apicid; #else apicid = m->mpc_apicid; #endif @@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) generic_processor_info(apicid, m->mpc_apicver); } +#ifdef CONFIG_X86_IO_APIC static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; - memcpy(str, m->mpc_bustype, 6); str[6] = 0; #ifdef CONFIG_X86_NUMAQ - mpc_oem_bus_info(m, str, translation_table[mpc_record]); + if (found_numaq) + mpc_oem_bus_info(m, str, translation_table[mpc_record]); #else - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); #endif #if MAX_MP_BUSSES < 256 @@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { #ifdef CONFIG_X86_NUMAQ - mpc_oem_pci_bus(m, translation_table[mpc_record]); + if (found_numaq) + mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { @@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) } else printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } +#endif #ifdef CONFIG_X86_IO_APIC @@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) if (bad_ioapic(m->mpc_apicaddr)) return; - mp_ioapics[nr_ioapics] = *m; + mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; + mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; + mp_ioapics[nr_ioapics].mp_type = m->mpc_type; + mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; + mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; nr_ioapics++; } -static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs[mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); +} + +static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +{ + printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, + (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, + mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); +} + +static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, + struct mp_config_intsrc *mp_irq) +{ + mp_irq->mp_dstapic = m->mpc_dstapic; + mp_irq->mp_type = m->mpc_type; + mp_irq->mp_irqtype = m->mpc_irqtype; + mp_irq->mp_irqflag = m->mpc_irqflag; + mp_irq->mp_srcbus = m->mpc_srcbus; + mp_irq->mp_srcbusirq = m->mpc_srcbusirq; + mp_irq->mp_dstirq = m->mpc_dstirq; +} + +static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + m->mpc_dstapic = mp_irq->mp_dstapic; + m->mpc_type = mp_irq->mp_type; + m->mpc_irqtype = mp_irq->mp_irqtype; + m->mpc_irqflag = mp_irq->mp_irqflag; + m->mpc_srcbus = mp_irq->mp_srcbus; + m->mpc_srcbusirq = mp_irq->mp_srcbusirq; + m->mpc_dstirq = mp_irq->mp_dstirq; +} + +static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, + struct mpc_config_intsrc *m) +{ + if (mp_irq->mp_dstapic != m->mpc_dstapic) + return 1; + if (mp_irq->mp_type != m->mpc_type) + return 2; + if (mp_irq->mp_irqtype != m->mpc_irqtype) + return 3; + if (mp_irq->mp_irqflag != m->mpc_irqflag) + return 4; + if (mp_irq->mp_srcbus != m->mpc_srcbus) + return 5; + if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) + return 6; + if (mp_irq->mp_dstirq != m->mpc_dstirq) + return 7; + + return 0; +} + +static void __init MP_intsrc_info(struct mpc_config_intsrc *m) +{ + int i; + + print_MP_intsrc_info(m); + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } @@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, +void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); + printk("Warning! Not a NUMA-Q system!\n"); + else + found_numaq = 1; + if (mpc->mpc_oemptr) smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, mpc->mpc_oemsize); @@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, * Read/parse the MPC */ -static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, + char *str) { - char str[16]; - char oem[10]; - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", @@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } memcpy(oem, mpc->mpc_oem, 8); oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); + printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); memcpy(str, mpc->mpc_productid, 12); str[12] = 0; - printk("Product ID: %s ", str); -#ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); -#endif - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); + printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); + return 1; +} + +static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) +{ + char str[16]; + char oem[10]; + + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; + + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +#ifdef CONFIG_X86_32 + /* + * need to make sure summit and es7000's mps_oem_check is safe to be + * called early via genericarch 's mps_oem_check + */ + if (early) { +#ifdef CONFIG_X86_NUMAQ + numaq_mps_oem_check(mpc, oem, str); +#endif + } else + mps_oem_check(mpc, oem, str); +#endif + /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { struct mpc_config_bus *m = (struct mpc_config_bus *)mpt; +#ifdef CONFIG_X86_IO_APIC MP_bus_info(m); +#endif mpt += sizeof(*m); count += sizeof(*m); break; @@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ++mpc_record; #endif } + +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif + setup_apic_routing(); if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); @@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; intsrc.mpc_irqtype = mp_INT; @@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) MP_intsrc_info(&intsrc); } -#endif -static inline void __init construct_default_ISA_mptable(int mpc_default_type) +static void construct_ioapic_table(int mpc_default_type) { - struct mpc_config_processor processor; - struct mpc_config_bus bus; -#ifdef CONFIG_X86_IO_APIC struct mpc_config_ioapic ioapic; -#endif - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } + struct mpc_config_bus bus; bus.mpc_type = MP_BUS; bus.mpc_busid = 0; @@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) MP_bus_info(&bus); } -#ifdef CONFIG_X86_IO_APIC ioapic.mpc_type = MP_IOAPIC; ioapic.mpc_apicid = 2; ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; @@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) * We set up most of the low 16 IO-APIC pins according to MPS rules. */ construct_default_ioirq_mptable(mpc_default_type); +} +#else +static inline void construct_ioapic_table(int mpc_default_type) { } #endif + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + construct_ioapic_table(mpc_default_type); + lintsrc.mpc_type = MP_LINTSRC; lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; @@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early) printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) if (mpf->mpf_feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; @@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early) * override the defaults. */ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; +#endif printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. " @@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); + printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { @@ -699,15 +849,21 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, !mpf_checksum((unsigned char *)bp, 16) && ((mpf->mpf_specification == 1) || (mpf->mpf_specification == 4))) { - +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif mpf_found = mpf; -#ifdef CONFIG_X86_32 + printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", mpf, virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, + + if (!reserve) + return 1; + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); if (mpf->mpf_physptr) { + unsigned long size = PAGE_SIZE; +#ifdef CONFIG_X86_32 /* * We cannot access to MPC table to compute * table size yet, as only few megabytes from @@ -717,24 +873,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * PAGE_SIZE from mpg->mpf_physptr yields BUG() * in reserve_bootmem. */ - unsigned long size = PAGE_SIZE; unsigned long end = max_low_pfn * PAGE_SIZE; if (mpf->mpf_physptr + size > end) size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size, +#endif + reserve_bootmem_generic(mpf->mpf_physptr, size, BOOTMEM_DEFAULT); } -#else - if (!reserve) - return 1; - - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, - PAGE_SIZE); -#endif - return 1; + return 1; } bp += 4; length -= 16; @@ -790,298 +937,294 @@ void __init find_smp_config(void) __find_smp_config(1); } -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ +#ifdef CONFIG_X86_IO_APIC +static u8 __initdata irq_used[MAX_IRQ_SOURCES]; -/* - * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: - */ -int es7000_plat; +static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) +{ + int i; -#ifdef CONFIG_ACPI + if (m->mpc_irqtype != mp_INT) + return 0; -#ifdef CONFIG_X86_IO_APIC + if (m->mpc_irqflag != 0x0f) + return 0; -#define MP_ISA_BUS 0 + /* not legacy */ -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; + for (i = 0; i < mp_irq_entries; i++) { + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; -static int mp_find_ioapic(int gsi) -{ - int i = 0; + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; + if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) + continue; + if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) + continue; + if (irq_used[i]) { + /* already claimed */ + return -2; + } + irq_used[i] = 1; + return i; } - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + /* not found */ return -1; } -static u8 __init uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); +#define SPARE_SLOT_NUM 20 + +static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; #endif -} -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +static int __init replace_intsrc_all(struct mp_config_table *mpc, + unsigned long mpc_new_phys, + unsigned long mpc_new_length) { - int idx = 0; - - if (bad_ioapic(address)) - return; +#ifdef CONFIG_X86_IO_APIC + int i; + int nr_m_spare = 0; +#endif - idx = nr_ioapics; + int count = sizeof(*mpc); + unsigned char *mpt = ((unsigned char *)mpc) + count; - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; + printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m = + (struct mpc_config_processor *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m = + (struct mpc_config_bus *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + } + case MP_INTSRC: + { +#ifdef CONFIG_X86_IO_APIC + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].mpc_apicver = 0; + printk(KERN_INFO "OLD "); + print_MP_intsrc_info(m); + i = get_MP_intsrc_index(m); + if (i > 0) { + assign_to_mpc_intsrc(&mp_irqs[i], m); + printk(KERN_INFO "NEW "); + print_mp_irq_info(&mp_irqs[i]); + } else if (!i) { + /* legacy, do nothing */ + } else if (nr_m_spare < SPARE_SLOT_NUM) { + /* + * not found (-1), or duplicated (-2) + * are invalid entries, + * we need to use the slot later + */ + m_spare[nr_m_spare] = m; + nr_m_spare++; + } #endif - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m = + (struct mpc_config_lintsrc *)mpt; + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + default: + /* wrong mptable */ + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); + printk(KERN_ERR "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, + 1, mpc, mpc->mpc_length, 1); + goto out; + } + } -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; +#ifdef CONFIG_X86_IO_APIC + for (i = 0; i < mp_irq_entries; i++) { + if (irq_used[i]) + continue; - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + if (mp_irqs[i].mp_irqtype != mp_INT) + continue; - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; + if (mp_irqs[i].mp_irqflag != 0x0f) + continue; - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ + if (nr_m_spare > 0) { + printk(KERN_INFO "*NEW* found "); + nr_m_spare--; + assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); + m_spare[nr_m_spare] = NULL; + } else { + struct mpc_config_intsrc *m = + (struct mpc_config_intsrc *)mpt; + count += sizeof(struct mpc_config_intsrc); + if (!mpc_new_phys) { + printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); + } else { + if (count <= mpc_new_length) + printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); + else { + printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); + goto out; + } + } + assign_to_mpc_intsrc(&mp_irqs[i], m); + mpc->mpc_length = count; + mpt += sizeof(struct mpc_config_intsrc); + } + print_mp_irq_info(&mp_irqs[i]); + } +#endif +out: + /* update checksum */ + mpc->mpc_checksum = 0; + mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, + mpc->mpc_length); - MP_intsrc_info(&intsrc); + return 0; } -void __init mp_config_acpi_legacy_irqs(void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; +static int __initdata enable_update_mptable; -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); +static int __init update_mptable_setup(char *str) +{ + enable_update_mptable = 1; + return 0; +} +early_param("update_mptable", update_mptable_setup); - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; +static unsigned long __initdata mpc_new_phys; +static unsigned long mpc_new_length __initdata = 4096; - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; +/* alloc_mptable or alloc_mptable=4k */ +static int __initdata alloc_mptable; +static int __init parse_alloc_mptable_opt(char *p) +{ + enable_update_mptable = 1; + alloc_mptable = 1; + if (!p) + return 0; + mpc_new_length = memparse(p, &p); + return 0; +} +early_param("alloc_mptable", parse_alloc_mptable_opt); - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; +void __init early_reserve_e820_mpc_new(void) +{ + if (enable_update_mptable && alloc_mptable) { + u64 startt = 0; +#ifdef CONFIG_X86_TRAMPOLINE + startt = TRAMPOLINE_BASE; #endif - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - MP_intsrc_info(&intsrc); + mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +static int __init update_mp_table(void) { - int ioapic; - int ioapic_pin; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 + char str[16]; + char oem[10]; + struct intel_mp_floating *mpf; + struct mp_config_table *mpc; + struct mp_config_table *mpc_new; + + if (!enable_update_mptable) + return 0; + + mpf = mpf_found; + if (!mpf) + return 0; - static int pci_irq = IRQ_COMPRESSION_START; /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. + * Now see if we need to go further. */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; -#endif + if (mpf->mpf_feature1 != 0) + return 0; - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; + if (!mpf->mpf_physptr) + return 0; - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } + mpc = phys_to_virt(mpf->mpf_physptr); - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + if (!smp_check_mpc(mpc, oem, str)) + return 0; -#ifdef CONFIG_X86_32 - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); -#endif + printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; + if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { + mpc_new_phys = 0; + printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", + mpc_new_length); } - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif + + if (!mpc_new_phys) { + unsigned char old, new; + /* check if we can change the postion */ + mpc->mpc_checksum = 0; + old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + mpc->mpc_checksum = 0xff; + new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); + if (old == new) { + printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); + return 0; + } + printk(KERN_INFO "use in-positon replacing\n"); + } else { + mpf->mpf_physptr = mpc_new_phys; + mpc_new = phys_to_virt(mpc_new_phys); + memcpy(mpc_new, mpc, mpc->mpc_length); + mpc = mpc_new; + /* check if we can modify that */ + if (mpc_new_phys - mpf->mpf_physptr) { + struct intel_mp_floating *mpf_new; + /* steal 16 bytes from [0, 1k) */ + printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); + mpf_new = phys_to_virt(0x400 - 16); + memcpy(mpf_new, mpf, 16); + mpf = mpf_new; + mpf->mpf_physptr = mpc_new_phys; + } + mpf->mpf_checksum = 0; + mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); } - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); -#ifdef CONFIG_X86_32 /* - * For GSI >= 64, use IRQ compression + * only replace the one with mp_INT and + * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, + * already in mp_irqs , stored by ... and mp_config_acpi_gsi, + * may need pci=routeirq for all coverage */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; + replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); + + return 0; } -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ +late_initcall(update_mp_table); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi.c index 84160f74eeb0..8dfe9db87a9e 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi.c @@ -11,10 +11,13 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include <asm/apic.h> + +#include <linux/nmi.h> +#include <linux/mm.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> -#include <linux/nmi.h> #include <linux/sysdev.h> #include <linux/sysctl.h> #include <linux/percpu.h> @@ -22,12 +25,18 @@ #include <linux/cpumask.h> #include <linux/kernel_stat.h> #include <linux/kdebug.h> -#include <linux/slab.h> +#include <linux/smp.h> +#include <asm/i8259.h> +#include <asm/io_apic.h> #include <asm/smp.h> #include <asm/nmi.h> +#include <asm/proto.h> +#include <asm/timer.h> + +#include <asm/mce.h> -#include "mach_traps.h" +#include <mach_traps.h> int unknown_nmi_panic; int nmi_watchdog_enabled; @@ -41,28 +50,65 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); + +static int panic_on_timeout; +static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); +static int endflag __initdata; + +static inline unsigned int get_nmi_count(int cpu) +{ +#ifdef CONFIG_X86_64 + return cpu_pda(cpu)->__nmi_count; +#else + return nmi_count(cpu); +#endif +} + +static inline int mce_in_progress(void) +{ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} -static int endflag __initdata = 0; +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ +#ifdef CONFIG_X86_64 + return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); +#else + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +#endif +} #ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when +/* + * The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. */ static __init void nmi_cpu_busy(void *data) { local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ + /* + * Intentionally don't use cpu_relax here. This is + * to make sure that the performance counter really ticks, + * even if there is a simulator or similar that catches the + * pause instruction. On a real HT machine this is fine because + * all other CPUs are busy with "useless" delay loops and don't + * care if they get somewhat less cycles. + */ while (endflag == 0) mb(); } @@ -73,15 +119,12 @@ int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) + if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) return 0; - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - return -1; + goto error; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -91,25 +134,19 @@ int __init check_nmi_watchdog(void) #endif for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = nmi_count(cpu); + prev_nmi_count[cpu] = get_nmi_count(cpu); local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - /* Check cpu_callin_map here because that is set - after the timer is started. */ - if (!cpu_isset(cpu, cpu_callin_map)) - continue; -#endif + for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], - nmi_count(cpu)); + get_nmi_count(cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } @@ -118,37 +155,50 @@ int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - return -1; + goto error; } printk("OK.\n"); - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ + /* + * now that we know it works we can reduce NMI frequency to + * something more reasonable; makes a difference in some configs + */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); kfree(prev_nmi_count); return 0; +error: + if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) + disable_8259A_irq(0); + return -1; } static int __init setup_nmi_watchdog(char *str) { - int nmi; + unsigned int nmi; + + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } get_option(&str, &nmi); - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) + if (nmi >= NMI_INVALID) return 0; nmi_watchdog = nmi; return 1; } - __setup("nmi_watchdog=", setup_nmi_watchdog); - -/* Suspend/resume support */ - +/* + * Suspend/resume support + */ #ifdef CONFIG_PM static int nmi_pm_active; /* nmi_active before suspend */ @@ -172,7 +222,6 @@ static int lapic_nmi_resume(struct sys_device *dev) return 0; } - static struct sysdev_class nmi_sysclass = { .name = "lapic_nmi", .resume = lapic_nmi_resume, @@ -188,7 +237,8 @@ static int __init init_lapic_nmi_sysfs(void) { int error; - /* should really be a BUG_ON but b/c this is an + /* + * should really be a BUG_ON but b/c this is an * init call, it just doesn't work. -dcz */ if (nmi_watchdog != NMI_LOCAL_APIC) @@ -202,6 +252,7 @@ static int __init init_lapic_nmi_sysfs(void) error = sysdev_register(&device_lapic_nmi); return error; } + /* must come after the local APIC's device_initcall() */ late_initcall(init_lapic_nmi_sysfs); @@ -223,7 +274,7 @@ void acpi_nmi_enable(void) static void __acpi_nmi_disable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); } /* @@ -242,12 +293,13 @@ void setup_apic_nmi_watchdog(void *unused) /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) + if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ + /* enable it before to avoid race with handler */ + __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; return; @@ -262,9 +314,8 @@ void setup_apic_nmi_watchdog(void *unused) void stop_apic_nmi_watchdog(void *unused) { /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; + if (!nmi_watchdog_active()) + return; if (__get_cpu_var(wd_enabled) == 0) return; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -284,26 +335,26 @@ void stop_apic_nmi_watchdog(void *unused) * since NMIs don't listen to _any_ locks, we have to be extremely * careful not to rely on unsafe variables. The printk might lock * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] + * [when there will be more tty-related locks, break them up here too!] */ -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; +static DEFINE_PER_CPU(unsigned, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) { - if (nmi_watchdog > 0) { + if (nmi_watchdog_active()) { unsigned cpu; /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. */ for_each_present_cpu(cpu) { - if (alert_counter[cpu]) - alert_counter[cpu] = 0; + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; } } @@ -314,12 +365,9 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -extern void die_nmi(struct pt_regs *, const char *msg); - notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { - /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -337,39 +385,45 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } + sum = get_timer_irqs(cpu); + + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + if (cpu_isset(cpu, backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); spin_unlock(&lock); cpu_clear(cpu, backtrace_mask); } - /* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) + touched = 1; /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && last_irq_sums[cpu] == sum) { + if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); } + /* see if the nmi watchdog went off */ if (!__get_cpu_var(wd_enabled)) return rc; @@ -378,7 +432,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) rc |= lapic_wd_event(nmi_hz); break; case NMI_IO_APIC: - /* don't know how to accurately check for this. + /* + * don't know how to accurately check for this. * just assume it was a watchdog timer interrupt * This matches the old behaviour. */ @@ -396,7 +451,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) char buf[64]; sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); + die_nmi(buf, regs, 1); /* Always panic here */ return 0; } @@ -414,32 +469,26 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, if (!!old_state == !!nmi_watchdog_enabled) return 0; - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { + printk(KERN_WARNING + "NMI watchdog is permanently disabled\n"); return -EIO; } - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } - if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) enable_lapic_nmi_watchdog(); else disable_lapic_nmi_watchdog(); } else { - printk( KERN_WARNING + printk(KERN_WARNING "NMI watchdog doesn't know what hardware to touch\n"); return -EIO; } return 0; } -#endif +#endif /* CONFIG_SYSCTL */ int do_nmi_callback(struct pt_regs *regs, int cpu) { @@ -462,6 +511,3 @@ void __trigger_all_cpu_backtrace(void) mdelay(1); } } - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c deleted file mode 100644 index 5a29ded994fa..000000000000 --- a/arch/x86/kernel/nmi_64.c +++ /dev/null @@ -1,482 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar <[email protected]> - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include <linux/nmi.h> -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/sysdev.h> -#include <linux/sysctl.h> -#include <linux/kprobes.h> -#include <linux/cpumask.h> -#include <linux/kdebug.h> - -#include <asm/smp.h> -#include <asm/nmi.h> -#include <asm/proto.h> -#include <asm/mce.h> - -#include <mach_traps.h> - -int unknown_nmi_panic; -int nmi_watchdog_enabled; -int panic_on_unrecovered_nmi; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -static int panic_on_timeout; - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -/* Run after command line and cpu_init init, but before all other checks */ -void nmi_watchdog_default(void) -{ - if (nmi_watchdog != NMI_DEFAULT) - return; - nmi_watchdog = NMI_NONE; -} - -static int endflag __initdata = 0; - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -int __init check_nmi_watchdog(void) -{ - int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - return -1; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); -#endif - - for (cpu = 0; cpu < NR_CPUS; cpu++) - prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - cpu_pda(cpu)->__nmi_count); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - return -1; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -} - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - if (!strncmp(str,"panic",5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs); - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - -#ifdef CONFIG_X86_MCE - /* Could check oops_in_progress here too, but it's safer - not too */ - if (atomic_read(&mce_entry) > 0) - touched = 1; -#endif - /* if the apic timer isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) - die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, - panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -static unsigned ignore_nmis; - -asmlinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - add_pda(__nmi_count,1); - if (!ignore_nmis) - default_do_nmi(regs); - nmi_exit(); -} - -void stop_nmi(void) -{ - acpi_nmi_disable(); - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; - acpi_nmi_enable(); -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - /* if nmi_watchdog is not set yet, then set it */ - nmi_watchdog_default(); - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index e65281b1634b..f0f1de1c4a1d 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -31,6 +31,8 @@ #include <asm/numaq.h> #include <asm/topology.h> #include <asm/processor.h> +#include <asm/mpspec.h> +#include <asm/e820.h> #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -58,6 +60,8 @@ static void __init smp_dump_qct(void) node_end_pfn[node] = MB_TO_PAGES( eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); + e820_register_active_regions(node, node_start_pfn[node], + node_end_pfn[node]); memory_present(node, node_start_pfn[node], node_end_pfn[node]); node_remap_size[node] = node_memmap_size_bytes(node, @@ -67,13 +71,24 @@ static void __init smp_dump_qct(void) } } -/* - * Unlike Summit, we don't really care to let the NUMA-Q - * fall back to flat mode. Don't compile for NUMA-Q - * unless you really need it! - */ +static __init void early_check_numaq(void) +{ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); +} + int __init get_memcfg_numaq(void) { + early_check_numaq(); + if (!found_numaq) + return 0; smp_dump_qct(); return 1; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5ea2a03..e0f571d58c19 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -30,6 +30,7 @@ #include <asm/setup.h> #include <asm/arch_hooks.h> #include <asm/time.h> +#include <asm/pgalloc.h> #include <asm/irq.h> #include <asm/delay.h> #include <asm/fixmap.h> @@ -139,7 +140,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || + type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || + type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else @@ -190,7 +193,9 @@ static void native_flush_tlb_single(unsigned long addr) /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_syscall_ret(void); +extern void native_irq_enable_sysexit(void); +extern void native_usergs_sysret32(void); +extern void native_usergs_sysret64(void); static int __init print_banner(void) { @@ -280,7 +285,7 @@ struct pv_time_ops pv_time_ops = { .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, + .get_tsc_khz = native_calibrate_tsc, }; struct pv_irq_ops pv_irq_ops = { @@ -291,6 +296,9 @@ struct pv_irq_ops pv_irq_ops = { .irq_enable = native_irq_enable, .safe_halt = native_safe_halt, .halt = native_halt, +#ifdef CONFIG_X86_64 + .adjust_exception_frame = paravirt_nop, +#endif }; struct pv_cpu_ops pv_cpu_ops = { @@ -321,12 +329,23 @@ struct pv_cpu_ops pv_cpu_ops = { .store_idt = native_store_idt, .store_tr = native_store_tr, .load_tls = native_load_tls, +#ifdef CONFIG_X86_64 + .load_gs_index = native_load_gs_index, +#endif .write_ldt_entry = native_write_ldt_entry, .write_gdt_entry = native_write_gdt_entry, .write_idt_entry = native_write_idt_entry, .load_sp0 = native_load_sp0, - .irq_enable_syscall_ret = native_irq_enable_syscall_ret, +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + .irq_enable_sysexit = native_irq_enable_sysexit, +#endif +#ifdef CONFIG_X86_64 +#ifdef CONFIG_IA32_EMULATION + .usergs_sysret32 = native_usergs_sysret32, +#endif + .usergs_sysret64 = native_usergs_sysret64, +#endif .iret = native_iret, .swapgs = native_swapgs, @@ -366,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = { .flush_tlb_single = native_flush_tlb_single, .flush_tlb_others = native_flush_tlb_others, + .pgd_alloc = __paravirt_pgd_alloc, + .pgd_free = paravirt_nop, + .alloc_pte = paravirt_nop, .alloc_pmd = paravirt_nop, .alloc_pmd_clone = paravirt_nop, @@ -380,6 +402,9 @@ struct pv_mmu_ops pv_mmu_ops = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = kmap_atomic, #endif @@ -403,6 +428,7 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, + .pte_flags = native_pte_val, .pgd_val = native_pgd_val, .make_pte = native_make_pte, @@ -416,6 +442,8 @@ struct pv_mmu_ops pv_mmu_ops = { .enter = paravirt_nop, .leave = paravirt_nop, }, + + .set_fixmap = native_set_fixmap, }; EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 82fc5fcab4f4..58262218781b 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); +DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); + PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 7d904e138d7e..061d01df9ae6 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -14,8 +14,9 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); -/* the three commands give us more control to how to return from a syscall */ -DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); +DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); +DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); +DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, @@ -35,7 +36,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); + PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); + PATCH_SITE(pv_cpu_ops, usergs_sysret32); + PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e28ec497e142..6959b5c45df4 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1394,7 +1394,7 @@ void __init detect_calgary(void) return; } - specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); + specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { struct calgary_bus_info *info = &bus_info[bus]; @@ -1459,7 +1459,7 @@ int __init calgary_iommu_init(void) if (ret) { printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " "falling back to no_iommu\n", ret); - if (end_pfn > MAX_DMA32_PFN) + if (max_pfn > MAX_DMA32_PFN) printk(KERN_ERR "WARNING more than 4GB of memory, " "32bit PCI may malfunction.\n"); return ret; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 09c4dac11b5f..8467ec2320f1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -7,6 +7,7 @@ #include <asm/dma.h> #include <asm/gart.h> #include <asm/calgary.h> +#include <asm/amd_iommu.h> int forbid_dac __read_mostly; EXPORT_SYMBOL(forbid_dac); @@ -74,7 +75,7 @@ early_param("dma32_size", parse_dma32_size_opt); void __init dma32_reserve_bootmem(void) { unsigned long size, align; - if (end_pfn <= MAX_DMA32_PFN) + if (max_pfn <= MAX_DMA32_PFN) return; /* @@ -93,7 +94,7 @@ void __init dma32_reserve_bootmem(void) static void __init dma32_free_bootmem(void) { - if (end_pfn <= MAX_DMA32_PFN) + if (max_pfn <= MAX_DMA32_PFN) return; if (!dma32_bootmem_ptr) @@ -123,6 +124,8 @@ void __init pci_iommu_alloc(void) detect_intel_iommu(); + amd_iommu_detect(); + #ifdef CONFIG_SWIOTLB pci_swiotlb_init(); #endif @@ -358,7 +361,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); /* Allocate DMA memory on node near device */ -noinline struct page * +static noinline struct page * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { int node; @@ -503,6 +506,8 @@ static int __init pci_iommu_init(void) intel_iommu_init(); + amd_iommu_init(); + #ifdef CONFIG_GART_IOMMU gart_iommu_init(); #endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 021f3c684a62..d0d18db5d2a4 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -751,10 +751,10 @@ void __init gart_iommu_init(void) return; if (no_iommu || - (!force_iommu && end_pfn <= MAX_DMA32_PFN) || + (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - if (end_pfn > MAX_DMA32_PFN) { + if (max_pfn > MAX_DMA32_PFN) { printk(KERN_WARNING "More than 4GB of memory " "but GART IOMMU not available.\n" KERN_WARNING "falling back to iommu=soft.\n"); diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 490da7f4b8d0..82299cd1d04d 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -38,7 +38,7 @@ const struct dma_mapping_ops swiotlb_dma_ops = { void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ - if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN) + if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; if (swiotlb_force) swiotlb = 1; diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c new file mode 100644 index 000000000000..675a48c404a5 --- /dev/null +++ b/arch/x86/kernel/probe_roms_32.c @@ -0,0 +1,166 @@ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/mmzone.h> +#include <linux/ioport.h> +#include <linux/seq_file.h> +#include <linux/console.h> +#include <linux/init.h> +#include <linux/edd.h> +#include <linux/dmi.h> +#include <linux/pfn.h> +#include <linux/pci.h> +#include <asm/pci-direct.h> + + +#include <asm/e820.h> +#include <asm/mmzone.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <setup_arch.h> + +static struct resource system_rom_resource = { + .name = "System ROM", + .start = 0xf0000, + .end = 0xfffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource extension_rom_resource = { + .name = "Extension ROM", + .start = 0xe0000, + .end = 0xeffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource adapter_rom_resources[] = { { + .name = "Adapter ROM", + .start = 0xc8000, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +} }; + +static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, + .end = 0xc7fff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +#define ROMSIGNATURE 0xaa55 + +static int __init romsignature(const unsigned char *rom) +{ + const unsigned short * const ptr = (const unsigned short *)rom; + unsigned short sig; + + return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; +} + +static int __init romchecksum(const unsigned char *rom, unsigned long length) +{ + unsigned char sum, c; + + for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + sum += c; + return !length && !sum; +} + +void __init probe_roms(void) +{ + const unsigned char *rom; + unsigned long start, length, upper; + unsigned char c; + int i; + + /* video rom */ + upper = adapter_rom_resources[0].start; + for (start = video_rom_resource.start; start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + video_rom_resource.start = start; + + if (probe_kernel_address(rom + 2, c) != 0) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = c * 512; + + /* if checksum okay, trust length byte */ + if (length && romchecksum(rom, length)) + video_rom_resource.end = start + length - 1; + + request_resource(&iomem_resource, &video_rom_resource); + break; + } + + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; + if (start < upper) + start = upper; + + /* system rom */ + request_resource(&iomem_resource, &system_rom_resource); + upper = system_rom_resource.start; + + /* check for extension rom (ignore length byte!) */ + rom = isa_bus_to_virt(extension_rom_resource.start); + if (romsignature(rom)) { + length = extension_rom_resource.end - extension_rom_resource.start + 1; + if (romchecksum(rom, length)) { + request_resource(&iomem_resource, &extension_rom_resource); + upper = extension_rom_resource.start; + } + } + + /* check for adapter roms on 2k boundaries */ + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + if (probe_kernel_address(rom + 2, c) != 0) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = c * 512; + + /* but accept any length that fits if checksum okay */ + if (!length || start + length > upper || !romchecksum(rom, length)) + continue; + + adapter_rom_resources[i].start = start; + adapter_rom_resources[i].end = start + length - 1; + request_resource(&iomem_resource, &adapter_rom_resources[i]); + + start = adapter_rom_resources[i++].end & ~2047UL; + } +} + diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba370dc8685b..4061d63aabe7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/pm.h> +#include <linux/clockchips.h> struct kmem_cache *task_xstate_cachep; @@ -45,6 +46,76 @@ void arch_task_cache_init(void) SLAB_PANIC, NULL); } +/* + * Idle related variables and functions + */ +unsigned long boot_option_idle_override = 0; +EXPORT_SYMBOL(boot_option_idle_override); + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); + +#ifdef CONFIG_X86_32 +/* + * This halt magic was a workaround for ancient floppy DMA + * wreckage. It should be safe to remove. + */ +static int hlt_counter; +void disable_hlt(void) +{ + hlt_counter++; +} +EXPORT_SYMBOL(disable_hlt); + +void enable_hlt(void) +{ + hlt_counter--; +} +EXPORT_SYMBOL(enable_hlt); + +static inline int hlt_use_halt(void) +{ + return (!hlt_counter && boot_cpu_data.hlt_works_ok); +} +#else +static inline int hlt_use_halt(void) +{ + return 1; +} +#endif + +/* + * We use this if we don't have any better + * idle routine.. + */ +void default_idle(void) +{ + if (hlt_use_halt()) { + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + + if (!need_resched()) + safe_halt(); /* enables interrupts racelessly */ + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + } else { + local_irq_enable(); + /* loop is done by the caller */ + cpu_relax(); + } +} +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif + static void do_nothing(void *unused) { } @@ -122,44 +193,129 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ + +#define MWAIT_INFO 0x05 +#define MWAIT_ECX_EXTENDED_INFO 0x01 +#define MWAIT_EDX_C1 0xf0 + static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) { + u32 eax, ebx, ecx, edx; + if (force_mwait) return 1; - if (c->x86_vendor == X86_VENDOR_AMD) { - switch(c->x86) { - case 0x10: - case 0x11: - return 0; - } - } + if (c->cpuid_level < MWAIT_INFO) + return 0; + + cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); + /* Check, whether EDX has extended info about MWAIT */ + if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) + return 1; + + /* + * edx enumeratios MONITOR/MWAIT extensions. Check, whether + * C1 supports MWAIT + */ + return (edx & MWAIT_EDX_C1); +} + +/* + * Check for AMD CPUs, which have potentially C1E support + */ +static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_AMD) + return 0; + + if (c->x86 < 0x0F) + return 0; + + /* Family 0x0f models < rev F do not have C1E */ + if (c->x86 == 0x0f && c->x86_model < 0x40) + return 0; + return 1; } -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +/* + * C1E aware idle routine. We check for C1E active in the interrupt + * pending message MSR. If we detect C1E, then we handle it the same + * way as C3 power states (local apic timer and TSC stop) + */ +static void c1e_idle(void) { - static int selected; + static cpumask_t c1e_mask = CPU_MASK_NONE; + static int c1e_detected; - if (selected) + if (need_resched()) return; + + if (!c1e_detected) { + u32 lo, hi; + + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (lo & K8_INTP_C1E_ACTIVE_MASK) { + c1e_detected = 1; + mark_tsc_unstable("TSC halt in C1E"); + printk(KERN_INFO "System has C1E enabled\n"); + } + } + + if (c1e_detected) { + int cpu = smp_processor_id(); + + if (!cpu_isset(cpu, c1e_mask)) { + cpu_set(cpu, c1e_mask); + /* + * Force broadcast so ACPI can not interfere. Needs + * to run with interrupts enabled as it uses + * smp_function_call. + */ + local_irq_enable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, + &cpu); + printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", + cpu); + local_irq_disable(); + } + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + + default_idle(); + + /* + * The switch back from broadcast mode needs to be + * called with interrupts disabled. + */ + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); + } else + default_idle(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ #ifdef CONFIG_X86_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { printk(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); } #endif + if (pm_idle) + return; + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { /* - * Skip, if setup has overridden idle. * One CPU supports mwait => All CPUs supports mwait */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } else if (check_c1e_idle(c)) { + printk(KERN_INFO "using C1E aware idle routine\n"); + pm_idle = c1e_idle; + } else + pm_idle = default_idle; } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e2db9ac5c61c..9a139f6c9df3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,11 +58,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -static int hlt_counter; - -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); @@ -77,57 +72,24 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); +#ifdef CONFIG_HOTPLUG_CPU +#include <asm/nmi.h> -void disable_hlt(void) +static void cpu_exit_clear(void) { - hlt_counter++; -} + int cpu = raw_smp_processor_id(); -EXPORT_SYMBOL(disable_hlt); + idle_task_exit(); -void enable_hlt(void) -{ - hlt_counter--; -} + cpu_uninit(); + irq_ctx_exit(cpu); -EXPORT_SYMBOL(enable_hlt); - -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - } else { - local_irq_enable(); - /* loop is done by the caller */ - cpu_relax(); - } + numa_remove_cpu(cpu); } -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(default_idle); -#endif -#ifdef CONFIG_HOTPLUG_CPU -#include <asm/nmi.h> /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { @@ -168,24 +130,19 @@ void cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(); while (!need_resched()) { - void (*idle)(void); check_pgt_cache(); rmb(); - idle = pm_idle; if (rcu_pending(cpu)) rcu_check_callbacks(cpu, 0); - if (!idle) - idle = default_idle; - if (cpu_is_offline(cpu)) play_dead(); local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; - idle(); + pm_idle(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c6eb5c91e5f6..db5eb963e4df 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -56,15 +56,6 @@ asmlinkage extern void ret_from_fork(void); unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); - static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) @@ -94,25 +85,6 @@ void exit_idle(void) __exit_idle(); } -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -150,12 +122,9 @@ void cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(); while (!need_resched()) { - void (*idle)(void); rmb(); - idle = pm_idle; - if (!idle) - idle = default_idle; + if (cpu_is_offline(smp_processor_id())) play_dead(); /* @@ -165,7 +134,7 @@ void cpu_idle(void) */ local_irq_disable(); enter_idle(); - idle(); + pm_idle(); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ @@ -366,10 +335,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; - asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); - asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); - asm("mov %%es,%0" : "=m" (p->thread.es)); - asm("mov %%ds,%0" : "=m" (p->thread.ds)); + savesegment(gs, p->thread.gsindex); + savesegment(fs, p->thread.fsindex); + savesegment(es, p->thread.es); + savesegment(ds, p->thread.ds); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -408,7 +377,9 @@ out: void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0)); + loadsegment(fs, 0); + loadsegment(es, 0); + loadsegment(ds, 0); load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; @@ -567,6 +538,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + unsigned fsindex, gsindex; /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter>5) @@ -581,22 +553,38 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ - asm volatile("mov %%es,%0" : "=m" (prev->es)); + savesegment(es, prev->es); if (unlikely(next->es | prev->es)) loadsegment(es, next->es); - - asm volatile ("mov %%ds,%0" : "=m" (prev->ds)); + + savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) loadsegment(ds, next->ds); + + /* We must save %fs and %gs before load_TLS() because + * %fs and %gs may be cleared by load_TLS(). + * + * (e.g. xen_load_tls()) + */ + savesegment(fs, fsindex); + savesegment(gs, gsindex); + load_TLS(next, cpu); + /* + * Leave lazy mode, flushing any hypercalls made here. + * This must be done before restoring TLS segments so + * the GDT and LDT are properly updated, and must be + * done before math_state_restore, so the TS bit is up + * to date. + */ + arch_leave_lazy_cpu_mode(); + /* * Switch FS and GS. */ { - unsigned fsindex; - asm volatile("movl %%fs,%0" : "=r" (fsindex)); /* segment register != 0 always requires a reload. also reload when it has changed. when prev process used 64bit base always reload @@ -614,10 +602,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (next->fs) wrmsrl(MSR_FS_BASE, next->fs); prev->fsindex = fsindex; - } - { - unsigned gsindex; - asm volatile("movl %%gs,%0" : "=r" (gsindex)); + if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); if (gsindex) @@ -798,7 +783,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) set_32bit_tls(task, FS_TLS, addr); if (doit) { load_TLS(&task->thread, cpu); - asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); + loadsegment(fs, FS_TLS_SEL); } task->thread.fsindex = FS_TLS_SEL; task->thread.fs = 0; @@ -808,7 +793,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (doit) { /* set the selector to 0 to not confuse __switch_to */ - asm volatile("movl %0,%%fs" :: "r" (0)); + loadsegment(fs, 0); ret = checking_wrmsrl(MSR_FS_BASE, addr); } } @@ -831,7 +816,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); else if (doit) { - asm("movl %%gs,%0" : "=r" (gsindex)); + savesegment(gs, gsindex); if (gsindex) rdmsrl(MSR_KERNEL_GS_BASE, base); else diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a7835f282936..77040b6070e1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -943,13 +943,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return copy_regset_to_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET_XFP, 0, sizeof(struct user_fxsr_struct), - datap); + datap) ? -EIO : 0; #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648fe710..79bdcd11c66e 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -65,6 +65,7 @@ static enum { ICH_FORCE_HPET_RESUME, VT8237_FORCE_HPET_RESUME, NVIDIA_FORCE_HPET_RESUME, + ATI_FORCE_HPET_RESUME, } force_hpet_resume_type; static void __iomem *rcba_base; @@ -158,6 +159,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, @@ -174,6 +177,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, static struct pci_dev *cached_dev; +static void hpet_print_force_info(void) +{ + printk(KERN_INFO "HPET not enabled in BIOS. " + "You might try hpet=force boot option\n"); +} + static void old_ich_force_hpet_resume(void) { u32 val; @@ -253,6 +262,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) { if (hpet_force_user) old_ich_force_enable_hpet(dev); + else + hpet_print_force_info(); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, @@ -290,8 +301,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_read_config_dword(dev, 0x68, &val); /* @@ -330,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, vt8237_force_enable_hpet); +static void ati_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x14, 0xfed00000); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void ati_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); + return; + } + + pci_write_config_dword(dev, 0x14, 0xfed00000); + pci_read_config_dword(dev, 0x14, &val); + force_hpet_address = val; + force_hpet_resume_type = ATI_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + ati_force_enable_hpet); + /* * Undocumented chipset feature taken from LinuxBIOS. */ @@ -343,8 +389,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_write_config_dword(dev, 0x44, 0xfed00001); pci_read_config_dword(dev, 0x44, &val); @@ -397,6 +448,9 @@ void force_hpet_resume(void) case NVIDIA_FORCE_HPET_RESUME: nvidia_force_hpet_resume(); return; + case ATI_FORCE_HPET_RESUME: + ati_force_hpet_resume(); + return; default: break; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f6be7d5f82f8..f8a62160e151 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -27,7 +27,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static long no_idt[3]; +static const struct desc_ptr no_idt = {}; static int reboot_mode; enum reboot_type reboot_type = BOOT_KBD; int reboot_force; @@ -201,15 +201,15 @@ core_initcall(reboot_init); controller to pulse the CPU reset line, which is more thorough, but doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct desc_ptr +static const struct desc_ptr real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }; @@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] = 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, int length) { local_irq_disable(); @@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void) } case BOOT_TRIPLE: - load_idt((const struct desc_ptr *)&no_idt); + load_idt(&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index dec0b5ec25c2..61a837743fe5 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -49,7 +49,7 @@ struct device_fixup { void (*reboot_fixup)(struct pci_dev *); }; -static struct device_fixup fixups_table[] = { +static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, @@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = { */ void mach_reboot_fixups(void) { - struct device_fixup *cur; + const struct device_fixup *cur; struct pci_dev *dev; int i; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a196..a7c3471ea17c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1,139 +1,866 @@ -#include <linux/kernel.h> +/* + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * + * Memory region support + * David Parsons <[email protected]>, July-August 1999 + * + * Added E820 sanitization routine (removes overlapping memory regions); + * Brian Moyle <[email protected]>, February 2001 + * + * Moved CPU detection code to cpu/${cpu}.c + * Patrick Mochel <[email protected]>, March 2002 + * + * Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach <[email protected]>, December 2002. + * + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/screen_info.h> +#include <linux/ioport.h> +#include <linux/acpi.h> +#include <linux/apm_bios.h> +#include <linux/initrd.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/console.h> +#include <linux/mca.h> +#include <linux/root_dev.h> +#include <linux/highmem.h> #include <linux/module.h> +#include <linux/efi.h> #include <linux/init.h> -#include <linux/bootmem.h> +#include <linux/edd.h> +#include <linux/iscsi_ibft.h> +#include <linux/nodemask.h> +#include <linux/kexec.h> +#include <linux/dmi.h> +#include <linux/pfn.h> +#include <linux/pci.h> +#include <asm/pci-direct.h> +#include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <linux/kallsyms.h> +#include <linux/edd.h> +#include <linux/iscsi_ibft.h> +#include <linux/kexec.h> +#include <linux/cpufreq.h> +#include <linux/dma-mapping.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> + #include <linux/percpu.h> +#include <linux/crash_dump.h> + +#include <video/edid.h> + +#include <asm/mtrr.h> +#include <asm/apic.h> +#include <asm/e820.h> +#include <asm/mpspec.h> +#include <asm/setup.h> +#include <asm/arch_hooks.h> +#include <asm/efi.h> +#include <asm/sections.h> +#include <asm/dmi.h> +#include <asm/io_apic.h> +#include <asm/ist.h> +#include <asm/vmi.h> +#include <setup_arch.h> +#include <asm/bios_ebda.h> +#include <asm/cacheflush.h> +#include <asm/processor.h> +#include <asm/bugs.h> + +#include <asm/system.h> +#include <asm/vsyscall.h> #include <asm/smp.h> +#include <asm/desc.h> +#include <asm/dma.h> +#include <asm/gart.h> +#include <asm/mmu_context.h> +#include <asm/proto.h> + +#include <mach_apic.h> +#include <asm/paravirt.h> + #include <asm/percpu.h> #include <asm/sections.h> -#include <asm/processor.h> -#include <asm/setup.h> #include <asm/topology.h> -#include <asm/mpspec.h> #include <asm/apicdef.h> +#ifdef CONFIG_X86_64 +#include <asm/numa_64.h> +#endif -#ifdef CONFIG_X86_LOCAL_APIC -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); +#ifndef ARCH_SETUP +#define ARCH_SETUP +#endif -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); +#ifndef CONFIG_DEBUG_BOOT_PARAMS +struct boot_params __initdata boot_params; +#else +struct boot_params boot_params; +#endif -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; +/* + * Machine setup.. + */ +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +#ifdef CONFIG_X86_32 +/* This value is set up by the early boot code to point to the value + immediately after the boot time page tables. It contains a *physical* + address, and must not be in the .bss segment! */ +unsigned long init_pg_tables_start __initdata = ~0UL; +unsigned long init_pg_tables_end __initdata = ~0UL; + +static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +/* cpu data as detected by the assembly code in head.S */ +struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +/* common cpu data for all cpus */ +struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +EXPORT_SYMBOL(boot_cpu_data); +static void set_mca_bus(int x) +{ +#ifdef CONFIG_MCA + MCA_bus = x; +#endif +} + +unsigned int def_to_bigsmp; + +/* for MCA, but anyone else can use it if they want */ +unsigned int machine_id; +unsigned int machine_submodel_id; +unsigned int BIOS_revision; + +struct apm_info apm_info; +EXPORT_SYMBOL(apm_info); + +#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) +struct ist_info ist_info; +EXPORT_SYMBOL(ist_info); +#else +struct ist_info ist_info; #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +#else +struct cpuinfo_x86 boot_cpu_data __read_mostly; +EXPORT_SYMBOL(boot_cpu_data); +#endif + + +#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) +unsigned long mmu_cr4_features; +#else +unsigned long mmu_cr4_features = X86_CR4_PAE; +#endif + +/* Boot loader ID as an integer, for the benefit of proc_dointvec */ +int bootloader_type; + /* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. + * Early DMI memory */ -static void __init setup_per_cpu_maps(void) +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; + +/* + * Setup options + */ +struct screen_info screen_info; +EXPORT_SYMBOL(screen_info); +struct edid_info edid_info; +EXPORT_SYMBOL_GPL(edid_info); + +extern int root_mountflags; + +unsigned long saved_video_mode; + +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + +static char __initdata command_line[COMMAND_LINE_SIZE]; + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) +struct edd edd; +#ifdef CONFIG_EDD_MODULE +EXPORT_SYMBOL(edd); +#endif +/** + * copy_edd() - Copy the BIOS EDD information + * from boot_params into a safe place. + * + */ +static inline void copy_edd(void) +{ + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; +} +#else +static inline void copy_edd(void) +{ +} +#endif + +#ifdef CONFIG_BLK_DEV_INITRD + +#ifdef CONFIG_X86_32 + +#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) +static void __init relocate_initrd(void) { - int cpu; - for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; - per_cpu(x86_bios_cpu_apicid, cpu) = - x86_bios_cpu_apicid_init[cpu]; -#ifdef CONFIG_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - x86_cpu_to_node_map_init[cpu]; + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 ramdisk_here; + unsigned long slop, clen, mapaddr; + char *p, *q; + + /* We need to move the initrd down into lowmem */ + ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, + PAGE_SIZE); + + if (ramdisk_here == -1ULL) + panic("Cannot find place for new RAMDISK of size %lld\n", + ramdisk_size); + + /* Note: this includes all the lowmem currently occupied by + the initrd, we rely on that fact to keep the data intact. */ + reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, + "NEW RAMDISK"); + initrd_start = ramdisk_here + PAGE_OFFSET; + initrd_end = initrd_start + ramdisk_size; + printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", + ramdisk_here, ramdisk_here + ramdisk_size); + + q = (char *)initrd_start; + + /* Copy any lowmem portion of the initrd */ + if (ramdisk_image < end_of_lowmem) { + clen = end_of_lowmem - ramdisk_image; + p = (char *)__va(ramdisk_image); + memcpy(q, p, clen); + q += clen; + ramdisk_image += clen; + ramdisk_size -= clen; + } + + /* Copy the highmem portion of the initrd */ + while (ramdisk_size) { + slop = ramdisk_image & ~PAGE_MASK; + clen = ramdisk_size; + if (clen > MAX_MAP_CHUNK-slop) + clen = MAX_MAP_CHUNK-slop; + mapaddr = ramdisk_image & PAGE_MASK; + p = early_ioremap(mapaddr, clen+slop); + memcpy(q, p+slop, clen); + early_iounmap(p, clen+slop); + q += clen; + ramdisk_image += clen; + ramdisk_size -= clen; + } + /* high pages is not converted by early_res_to_bootmem */ + ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_size = boot_params.hdr.ramdisk_size; + printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" + " %08llx - %08llx\n", + ramdisk_image, ramdisk_image + ramdisk_size - 1, + ramdisk_here, ramdisk_here + ramdisk_size - 1); +} #endif + +static void __init reserve_initrd(void) +{ + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (!boot_params.hdr.type_of_loader || + !ramdisk_image || !ramdisk_size) + return; /* No initrd provided by bootloader */ + + initrd_start = 0; + + if (ramdisk_size >= (end_of_lowmem>>1)) { + free_early(ramdisk_image, ramdisk_end); + printk(KERN_ERR "initrd too large to handle, " + "disabling initrd\n"); + return; + } + + printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, + ramdisk_end); + + + if (ramdisk_end <= end_of_lowmem) { + /* All in lowmem, easy case */ + /* + * don't need to reserve again, already reserved early + * in i386_start_kernel + */ + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start + ramdisk_size; + return; } - /* indicate the early static arrays will soon be gone */ - x86_cpu_to_apicid_early_ptr = NULL; - x86_bios_cpu_apicid_early_ptr = NULL; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = NULL; +#ifdef CONFIG_X86_32 + relocate_initrd(); +#else + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08llx > 0x%08llx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); + initrd_start = 0; #endif + free_early(ramdisk_image, ramdisk_end); +} +#else +static void __init reserve_initrd(void) +{ } +#endif /* CONFIG_BLK_DEV_INITRD */ + +static void __init parse_setup_data(void) +{ + struct setup_data *data; + u64 pa_data; -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -cpumask_t *cpumask_of_cpu_map __read_mostly; -EXPORT_SYMBOL(cpumask_of_cpu_map); + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, PAGE_SIZE); + switch (data->type) { + case SETUP_E820_EXT: + parse_e820_ext(data, pa_data); + break; + default: + break; + } + pa_data = data->next; + early_iounmap(data, PAGE_SIZE); + } +} -/* requires nr_cpu_ids to be initialized */ -static void __init setup_cpumask_of_cpu(void) +static void __init e820_reserve_setup_data(void) { - int i; + struct setup_data *data; + u64 pa_data; + int found = 0; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + e820_update_range(pa_data, sizeof(*data)+data->len, + E820_RAM, E820_RESERVED_KERN); + found = 1; + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } + if (!found) + return; + + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + memcpy(&e820_saved, &e820, sizeof(struct e820map)); + printk(KERN_INFO "extended physical RAM map:\n"); + e820_print_map("reserve setup_data"); +} - /* alloc_bootmem zeroes memory */ - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); - for (i = 0; i < nr_cpu_ids; i++) - cpu_set(i, cpumask_of_cpu_map[i]); +static void __init reserve_early_setup_data(void) +{ + struct setup_data *data; + u64 pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} + +/* + * --------- Crashkernel reservation ------------------------------ + */ + +#ifdef CONFIG_KEXEC + +/** + * Reserve @size bytes of crashkernel memory at any suitable offset. + * + * @size: Size of the crashkernel memory to reserve. + * Returns the base address on success, and -1ULL on failure. + */ +unsigned long long find_and_reserve_crashkernel(unsigned long long size) +{ + const unsigned long long alignment = 16<<20; /* 16M */ + unsigned long long start = 0LL; + + while (1) { + int ret; + + start = find_e820_area(start, ULONG_MAX, size, alignment); + if (start == -1ULL) + return start; + + /* try to reserve it */ + ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); + if (ret >= 0) + return start; + + start += alignment; + } +} + +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + total += highend_pfn - highstart_pfn; +#endif + + return total << PAGE_SHIFT; +} + +static void __init reserve_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + + /* 0 means: find the address automatically */ + if (crash_base <= 0) { + crash_base = find_and_reserve_crashkernel(crash_size); + if (crash_base == -1ULL) { + pr_info("crashkernel reservation failed. " + "No suitable area found.\n"); + return; + } + } else { + ret = reserve_bootmem_generic(crash_base, crash_size, + BOOTMEM_EXCLUSIVE); + if (ret < 0) { + pr_info("crashkernel reservation failed - " + "memory is in use\n"); + return; + } + } + + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); } #else -static inline void setup_cpumask_of_cpu(void) { } +static void __init reserve_crashkernel(void) +{ +} #endif -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way +static struct resource standard_io_resources[] = { + { .name = "dma1", .start = 0x00, .end = 0x1f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "pic1", .start = 0x20, .end = 0x21, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "timer0", .start = 0x40, .end = 0x43, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "timer1", .start = 0x50, .end = 0x53, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "keyboard", .start = 0x60, .end = 0x60, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "keyboard", .start = 0x64, .end = 0x64, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "dma page reg", .start = 0x80, .end = 0x8f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "pic2", .start = 0xa0, .end = 0xa1, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "dma2", .start = 0xc0, .end = 0xdf, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "fpu", .start = 0xf0, .end = 0xff, + .flags = IORESOURCE_BUSY | IORESOURCE_IO } +}; + +static void __init reserve_standard_io_resources(void) +{ + int i; + + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + +} + +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. This option will be passed + * by kexec loader to the capture kernel. */ -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); +static int __init setup_elfcorehdr(char *arg) +{ + char *end; + if (!arg) + return -EINVAL; + elfcorehdr_addr = memparse(arg, &end); + return end > arg ? 0 : -EINVAL; +} +early_param("elfcorehdr", setup_elfcorehdr); #endif /* - * Great future plan: - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. - * Always point %gs to its beginning + * Determine if we were loaded by an EFI loader. If so, then we have also been + * passed the efi memmap, systab, etc., so we should use these data structures + * for initialization. Note, the efi init code path is determined by the + * global efi_enabled. This allows the same kernel image to be used on existing + * systems (with a traditional BIOS) as well as on EFI systems. + */ +/* + * setup_arch - architecture-specific boot-time initializations + * + * Note: On x86_64, fixmaps are ready for use even before this is called. */ -void __init setup_per_cpu_areas(void) + +void __init setup_arch(char **cmdline_p) { - int i, highest_cpu = 0; - unsigned long size; +#ifdef CONFIG_X86_32 + memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); + pre_setup_arch_hook(); + early_cpu_init(); +#else + printk(KERN_INFO "Command line: %s\n", boot_command_line); +#endif -#ifdef CONFIG_HOTPLUG_CPU - prefill_possible_map(); + early_ioremap_init(); + + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; +#ifdef CONFIG_X86_32 + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + if (boot_params.sys_desc_table.length != 0) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; + } +#endif + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; + +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); +#endif +#ifdef CONFIG_EFI + if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, +#ifdef CONFIG_X86_32 + "EL32", +#else + "EL64", +#endif + 4)) { + efi_enabled = 1; + efi_reserve_early(); + } #endif - /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", - size); + ARCH_SETUP - for_each_possible_cpu(i) { - char *ptr; -#ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); + setup_memory_map(); + parse_setup_data(); + /* update the e820_saved too */ + e820_reserve_setup_data(); + + copy_edd(); + + if (!boot_params.hdr.root_flags) + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; +#ifdef CONFIG_X86_32 + init_mm.brk = init_pg_tables_end + PAGE_OFFSET; #else - int node = early_cpu_to_node(i); - if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(size); - printk(KERN_INFO - "cpu %d has no node or node-local memory\n", i); - } - else - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + init_mm.brk = (unsigned long) &_end; #endif - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); + + code_resource.start = virt_to_phys(_text); + code_resource.end = virt_to_phys(_etext)-1; + data_resource.start = virt_to_phys(_etext); + data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; + #ifdef CONFIG_X86_64 - cpu_pda(i)->data_offset = ptr - __per_cpu_start; -#else - __per_cpu_offset[i] = ptr - __per_cpu_start; + early_cpu_init(); #endif - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); - highest_cpu = i; + /* after early param, so could get panic from serial */ + reserve_early_setup_data(); + + if (acpi_mps_check()) { +#ifdef CONFIG_X86_LOCAL_APIC + disable_apic = 1; +#endif + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); } - nr_cpu_ids = highest_cpu + 1; - printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); + finish_e820_parsing(); - /* Setup percpu data maps */ - setup_per_cpu_maps(); +#ifdef CONFIG_X86_32 + probe_roms(); +#endif - /* Setup cpumask_of_cpu map */ - setup_cpumask_of_cpu(); -} + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + + if (efi_enabled) + efi_init(); +#ifdef CONFIG_X86_32 + if (ppro_with_ram_bug()) { + e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, + E820_RESERVED); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + printk(KERN_INFO "fixed physical RAM map:\n"); + e820_print_map("bad_ppro"); + } +#else + early_gart_iommu_check(); +#endif + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + max_pfn = e820_end(); + + /* preallocate 4k for mptable mpc */ + early_reserve_e820_mpc_new(); + /* update e820 for memory not covered by WB MTRRs */ + mtrr_bp_init(); + if (mtrr_trim_uncached_memory(max_pfn)) + max_pfn = e820_end(); + +#ifdef CONFIG_X86_32 + /* max_low_pfn get updated here */ + find_low_pfn_range(); +#else + num_physpages = max_pfn; + + check_efer(); + + /* How many end-of-memory variables you have, grandma! */ + /* need this before calling reserve_initrd */ + max_low_pfn = max_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; +#endif + + /* max_pfn_mapped is updated here */ + max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT)); + + /* + * NOTE: On x86-32, only from this point on, fixmaps are ready for use. + */ + +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + if (init_ohci1394_dma_early) + init_ohci1394_dma_on_all_controllers(); +#endif + + reserve_initrd(); + +#ifdef CONFIG_X86_64 + vsmp_init(); +#endif + + dmi_scan_machine(); + + io_delay_init(); + + /* + * Parse the ACPI tables for possible boot-time SMP configuration. + */ + acpi_boot_table_init(); + +#ifdef CONFIG_ACPI_NUMA + /* + * Parse SRAT to discover nodes. + */ + acpi_numa_init(); +#endif + + initmem_init(0, max_pfn); + +#ifdef CONFIG_X86_64 + dma32_reserve_bootmem(); +#endif + +#ifdef CONFIG_ACPI_SLEEP + /* + * Reserve low memory region for sleep support. + */ + acpi_reserve_bootmem(); +#endif +#ifdef CONFIG_X86_FIND_SMP_CONFIG + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); #endif + reserve_crashkernel(); + + reserve_ibft_region(); + +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be after max_low_pfn is determined, and before kernel + * pagetables are setup. + */ + vmi_init(); +#endif + + paging_init(); + +#ifdef CONFIG_X86_64 + map_vsyscall(); +#endif + +#ifdef CONFIG_X86_GENERICARCH + generic_apic_probe(); +#endif + + early_quirks(); + + /* + * Read APIC and some other early information from ACPI tables. + */ + acpi_boot_init(); + +#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + get_smp_config(); +#endif + + prefill_possible_map(); +#ifdef CONFIG_X86_64 + init_cpu_to_node(); +#endif + + init_apic_mappings(); + ioapic_init_mappings(); + +#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) + if (def_to_bigsmp) + printk(KERN_WARNING "More than 8 CPUs detected and " + "CONFIG_X86_PC cannot handle it.\nUse " + "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); +#endif + kvm_guest_init(); + + e820_reserve_resources(); + e820_mark_nosave_regions(max_low_pfn); + +#ifdef CONFIG_X86_32 + request_resource(&iomem_resource, &video_ram_resource); +#endif + reserve_standard_io_resources(); + + e820_setup_gap(); + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) + if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif +} diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c deleted file mode 100644 index aee0e8200777..000000000000 --- a/arch/x86/kernel/setup64.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * X86-64 specific CPU setup. - * Copyright (C) 1995 Linus Torvalds - * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. - * See setup.c for older changelog. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/bootmem.h> -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/kgdb.h> -#include <asm/pda.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/mmu_context.h> -#include <asm/smp.h> -#include <asm/i387.h> -#include <asm/percpu.h> -#include <asm/proto.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/genapic.h> - -#ifndef CONFIG_DEBUG_BOOT_PARAMS -struct boot_params __initdata boot_params; -#else -struct boot_params boot_params; -#endif - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(_cpu_pda); -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata = 0; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32 = 0; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = - (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); - } - - - pda->irqstackptr += IRQSTACKSIZE-64; -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] -__attribute__((section(".bss.page_aligned"))); - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to set CS/DS - * but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init (); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) { - __supported_pte_mask &= ~_PAGE_NX; - } -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. - */ -void __cpuinit cpu_init (void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) { - pda_init(cpu); - } else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk("Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - if (cpu) - memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); - - cpu_gdt_descr[cpu].size = GDT_SIZE; - load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - - /* - * set up and load the per-CPU TSS - */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c deleted file mode 100644 index 5a2f8e063887..000000000000 --- a/arch/x86/kernel/setup_32.c +++ /dev/null @@ -1,964 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons <[email protected]>, July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle <[email protected]>, February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel <[email protected]>, March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <[email protected]>, December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/screen_info.h> -#include <linux/ioport.h> -#include <linux/acpi.h> -#include <linux/apm_bios.h> -#include <linux/initrd.h> -#include <linux/bootmem.h> -#include <linux/seq_file.h> -#include <linux/console.h> -#include <linux/mca.h> -#include <linux/root_dev.h> -#include <linux/highmem.h> -#include <linux/module.h> -#include <linux/efi.h> -#include <linux/init.h> -#include <linux/edd.h> -#include <linux/iscsi_ibft.h> -#include <linux/nodemask.h> -#include <linux/kexec.h> -#include <linux/crash_dump.h> -#include <linux/dmi.h> -#include <linux/pfn.h> -#include <linux/pci.h> -#include <linux/init_ohci1394_dma.h> -#include <linux/kvm_para.h> - -#include <video/edid.h> - -#include <asm/mtrr.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/mpspec.h> -#include <asm/mmzone.h> -#include <asm/setup.h> -#include <asm/arch_hooks.h> -#include <asm/sections.h> -#include <asm/io_apic.h> -#include <asm/ist.h> -#include <asm/io.h> -#include <asm/vmi.h> -#include <setup_arch.h> -#include <asm/bios_ebda.h> -#include <asm/cacheflush.h> -#include <asm/processor.h> - -/* This value is set up by the early boot code to point to the value - immediately after the boot time page tables. It contains a *physical* - address, and must not be in the .bss segment! */ -unsigned long init_pg_tables_end __initdata = ~0UL; - -/* - * Machine setup.. - */ -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x0060, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0064, - .end = 0x0064, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -/* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; -/* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; -EXPORT_SYMBOL(boot_cpu_data); - -unsigned int def_to_bigsmp; - -#ifndef CONFIG_X86_PAE -unsigned long mmu_cr4_features; -#else -unsigned long mmu_cr4_features = X86_CR4_PAE; -#endif - -/* for MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; - -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ -int bootloader_type; - -/* user-defined highmem size */ -static unsigned int highmem_pages = -1; - -/* - * Setup options - */ -struct screen_info screen_info; -EXPORT_SYMBOL(screen_info); -struct apm_info apm_info; -EXPORT_SYMBOL(apm_info); -struct edid_info edid_info; -EXPORT_SYMBOL_GPL(edid_info); -struct ist_info ist_info; -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ - defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -EXPORT_SYMBOL(ist_info); -#endif - -extern void early_cpu_init(void); -extern int root_mountflags; - -unsigned long saved_video_mode; - -#define RAMDISK_IMAGE_START_MASK 0x07FF -#define RAMDISK_PROMPT_FLAG 0x8000 -#define RAMDISK_LOAD_FLAG 0x4000 - -static char __initdata command_line[COMMAND_LINE_SIZE]; - -#ifndef CONFIG_DEBUG_BOOT_PARAMS -struct boot_params __initdata boot_params; -#else -struct boot_params boot_params; -#endif - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -struct edd edd; -#ifdef CONFIG_EDD_MODULE -EXPORT_SYMBOL(edd); -#endif -/** - * copy_edd() - Copy the BIOS EDD information - * from boot_params into a safe place. - * - */ -static inline void copy_edd(void) -{ - memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, - sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); - edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; - edd.edd_info_nr = boot_params.eddbuf_entries; -} -#else -static inline void copy_edd(void) -{ -} -#endif - -int __initdata user_defined_memmap; - -/* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to <mem>, overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * <start> to <start>+<mem>, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] - */ -static int __init parse_mem(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "nopentium") == 0) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; - - mem_size = memparse(arg, &arg); - limit_regions(mem_size); - user_defined_memmap = 1; - } - return 0; -} -early_param("mem", parse_mem); - -#ifdef CONFIG_PROC_VMCORE -/* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ -static int __init parse_elfcorehdr(char *arg) -{ - if (!arg) - return -EINVAL; - - elfcorehdr_addr = memparse(arg, &arg); - return 0; -} -early_param("elfcorehdr", parse_elfcorehdr); -#endif /* CONFIG_PROC_VMCORE */ - -/* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ -static int __init parse_highmem(char *arg) -{ - if (!arg) - return -EINVAL; - - highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; - return 0; -} -early_param("highmem", parse_highmem); - -/* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - - __VMALLOC_RESERVE = memparse(arg, &arg); - return 0; -} -early_param("vmalloc", parse_vmalloc); - -/* - * reservetop=size reserves a hole at the top of the kernel address space which - * a hypervisor can load into later. Needed for dynamically loaded hypervisors, - * so relocating the fixmap can be done before paging initialization. - */ -static int __init parse_reservetop(char *arg) -{ - unsigned long address; - - if (!arg) - return -EINVAL; - - address = memparse(arg, &arg); - reserve_top_address(address); - return 0; -} -early_param("reservetop", parse_reservetop); - -/* - * Determine low and high memory ranges: - */ -unsigned long __init find_max_low_pfn(void) -{ - unsigned long max_low_pfn; - - max_low_pfn = max_pfn; - if (max_low_pfn > MAXMEM_PFN) { - if (highmem_pages == -1) - highmem_pages = max_pfn - MAXMEM_PFN; - if (highmem_pages + MAXMEM_PFN < max_pfn) - max_pfn = MAXMEM_PFN + highmem_pages; - if (highmem_pages + MAXMEM_PFN > max_pfn) { - printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn = MAXMEM_PFN; -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - max_pfn = MAXMEM_PFN; -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_HIGHMEM64G - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); - } -#endif /* !CONFIG_HIGHMEM64G */ -#endif /* !CONFIG_HIGHMEM */ - } else { - if (highmem_pages == -1) - highmem_pages = 0; -#ifdef CONFIG_HIGHMEM - if (highmem_pages >= max_pfn) { - printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); - highmem_pages = 0; - } - if (highmem_pages) { - if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){ - printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn -= highmem_pages; - } -#else - if (highmem_pages) - printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); -#endif - } - return max_low_pfn; -} - -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); -} - -#ifndef CONFIG_NEED_MULTIPLE_NODES -static void __init setup_bootmem_allocator(void); -static unsigned long __init setup_memory(void) -{ - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - min_low_pfn = PFN_UP(init_pg_tables_end); - - max_low_pfn = find_max_low_pfn(); - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > max_low_pfn) { - highstart_pfn = max_low_pfn; - } - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - num_physpages = max_low_pfn; - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - - setup_bootmem_allocator(); - - return max_low_pfn; -} - -static void __init zone_sizes_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; -#ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; - add_active_range(0, 0, highend_pfn); -#else - add_active_range(0, 0, max_low_pfn); -#endif - - free_area_init_nodes(max_zone_pfns); -} -#else -extern unsigned long __init setup_memory(void); -extern void zone_sizes_init(void); -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ - -static inline unsigned long long get_total_mem(void) -{ - unsigned long long total; - - total = max_low_pfn - min_low_pfn; -#ifdef CONFIG_HIGHMEM - total += highend_pfn - highstart_pfn; -#endif - - return total << PAGE_SHIFT; -} - -#ifdef CONFIG_KEXEC -static void __init reserve_crashkernel(void) -{ - unsigned long long total_mem; - unsigned long long crash_size, crash_base; - int ret; - - total_mem = get_total_mem(); - - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); - if (ret == 0 && crash_size > 0) { - if (crash_base > 0) { - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); - - if (reserve_bootmem(crash_base, crash_size, - BOOTMEM_EXCLUSIVE) < 0) { - printk(KERN_INFO "crashkernel reservation " - "failed - memory is in use\n"); - return; - } - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } else - printk(KERN_INFO "crashkernel reservation failed - " - "you have to specify a base address\n"); - } -} -#else -static inline void __init reserve_crashkernel(void) -{} -#endif - -#ifdef CONFIG_BLK_DEV_INITRD - -static bool do_relocate_initrd = false; - -static void __init reserve_initrd(void) -{ - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; - unsigned long ramdisk_here; - - initrd_start = 0; - - if (!boot_params.hdr.type_of_loader || - !ramdisk_image || !ramdisk_size) - return; /* No initrd provided by bootloader */ - - if (ramdisk_end < ramdisk_image) { - printk(KERN_ERR "initrd wraps around end of memory, " - "disabling initrd\n"); - return; - } - if (ramdisk_size >= end_of_lowmem/2) { - printk(KERN_ERR "initrd too large to handle, " - "disabling initrd\n"); - return; - } - if (ramdisk_end <= end_of_lowmem) { - /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); - initrd_start = ramdisk_image + PAGE_OFFSET; - initrd_end = initrd_start+ramdisk_size; - return; - } - - /* We need to move the initrd down into lowmem */ - ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; - - /* Note: this includes all the lowmem currently occupied by - the initrd, we rely on that fact to keep the data intact. */ - reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); - initrd_start = ramdisk_here + PAGE_OFFSET; - initrd_end = initrd_start + ramdisk_size; - - do_relocate_initrd = true; -} - -#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) - -static void __init relocate_initrd(void) -{ - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; - unsigned long ramdisk_here; - unsigned long slop, clen, mapaddr; - char *p, *q; - - if (!do_relocate_initrd) - return; - - ramdisk_here = initrd_start - PAGE_OFFSET; - - q = (char *)initrd_start; - - /* Copy any lowmem portion of the initrd */ - if (ramdisk_image < end_of_lowmem) { - clen = end_of_lowmem - ramdisk_image; - p = (char *)__va(ramdisk_image); - memcpy(q, p, clen); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } - - /* Copy the highmem portion of the initrd */ - while (ramdisk_size) { - slop = ramdisk_image & ~PAGE_MASK; - clen = ramdisk_size; - if (clen > MAX_MAP_CHUNK-slop) - clen = MAX_MAP_CHUNK-slop; - mapaddr = ramdisk_image & PAGE_MASK; - p = early_ioremap(mapaddr, clen+slop); - memcpy(q, p+slop, clen); - early_iounmap(p, clen+slop); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } -} - -#endif /* CONFIG_BLK_DEV_INITRD */ - -void __init setup_bootmem_allocator(void) -{ - unsigned long bootmap_size; - /* - * Initialize the boot-time allocator (with low memory only): - */ - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); - - register_bootmem_low_pages(max_low_pfn); - - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), - BOOTMEM_DEFAULT); - - /* - * reserve physical page 0 - it's a special BIOS page on many boxes, - * enabling clean reboots, SMP operation, laptop functions. - */ - reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); - - /* reserve EBDA region */ - reserve_ebda_region(); - -#ifdef CONFIG_SMP - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - */ - reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); -#endif -#ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. - */ - acpi_reserve_bootmem(); -#endif -#ifdef CONFIG_X86_FIND_SMP_CONFIG - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); -#endif -#ifdef CONFIG_BLK_DEV_INITRD - reserve_initrd(); -#endif - numa_kva_reserve(); - reserve_crashkernel(); - - reserve_ibft_region(); -} - -/* - * The node 0 pgdat is initialized before all of these because - * it's needed for bootmem. node>0 pgdats have their virtual - * space allocated before the pagetables are in place to access - * them, so they can't be cleared then. - * - * This should all compile down to nothing when NUMA is off. - */ -static void __init remapped_pgdat_init(void) -{ - int nid; - - for_each_online_node(nid) { - if (nid != 0) - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - } -} - -#ifdef CONFIG_MCA -static void set_mca_bus(int x) -{ - MCA_bus = x; -} -#else -static void set_mca_bus(int x) { } -#endif - -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __init __attribute__((weak)) memory_setup(void) -{ - return machine_specific_memory_setup(); -} - -#ifdef CONFIG_NUMA -/* - * In the golden day, when everything among i386 and x86_64 will be - * integrated, this will not live here - */ -void *x86_cpu_to_node_map_early_ptr; -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -#endif - -/* - * Determine if we were loaded by an EFI loader. If so, then we have also been - * passed the efi memmap, systab, etc., so we should use these data structures - * for initialization. Note, the efi init code path is determined by the - * global efi_enabled. This allows the same kernel image to be used on existing - * systems (with a traditional BIOS) as well as on EFI systems. - */ -void __init setup_arch(char **cmdline_p) -{ - unsigned long max_low_pfn; - - memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); - pre_setup_arch_hook(); - early_cpu_init(); - early_ioremap_init(); - -#ifdef CONFIG_EFI - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL32", 4)) - efi_enabled = 1; -#endif - - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); - screen_info = boot_params.screen_info; - edid_info = boot_params.edid_info; - apm_info.bios = boot_params.apm_bios_info; - ist_info = boot_params.ist_info; - saved_video_mode = boot_params.hdr.vid_mode; - if( boot_params.sys_desc_table.length != 0 ) { - set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); - machine_id = boot_params.sys_desc_table.table[0]; - machine_submodel_id = boot_params.sys_desc_table.table[1]; - BIOS_revision = boot_params.sys_desc_table.table[2]; - } - bootloader_type = boot_params.hdr.type_of_loader; - -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); -#endif - ARCH_SETUP - - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); - - copy_edd(); - - if (!boot_params.hdr.root_flags) - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = init_pg_tables_end + PAGE_OFFSET; - - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); - data_resource.end = virt_to_phys(_edata)-1; - bss_resource.start = virt_to_phys(&__bss_start); - bss_resource.end = virt_to_phys(&__bss_stop)-1; - - parse_early_param(); - - if (user_defined_memmap) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - if (efi_enabled) - efi_init(); - - /* update e820 for memory not covered by WB MTRRs */ - propagate_e820_map(); - mtrr_bp_init(); - if (mtrr_trim_uncached_memory(max_pfn)) - propagate_e820_map(); - - max_low_pfn = setup_memory(); - -#ifdef CONFIG_KVM_CLOCK - kvmclock_init(); -#endif - -#ifdef CONFIG_VMI - /* - * Must be after max_low_pfn is determined, and before kernel - * pagetables are setup. - */ - vmi_init(); -#endif - kvm_guest_init(); - - /* - * NOTE: before this point _nobody_ is allowed to allocate - * any memory using the bootmem allocator. Although the - * allocator is now initialised only the first 8Mb of the kernel - * virtual address space has been mapped. All allocations before - * paging_init() has completed must use the alloc_bootmem_low_pages() - * variant (which allocates DMA'able memory) and care must be taken - * not to exceed the 8Mb limit. - */ - -#ifdef CONFIG_SMP - smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ -#endif - paging_init(); - - /* - * NOTE: On x86-32, only from this point on, fixmaps are ready for use. - */ - -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - if (init_ohci1394_dma_early) - init_ohci1394_dma_on_all_controllers(); -#endif - - remapped_pgdat_init(); - sparse_init(); - zone_sizes_init(); - - /* - * NOTE: at this point the bootmem allocator is fully available. - */ - -#ifdef CONFIG_BLK_DEV_INITRD - relocate_initrd(); -#endif - - paravirt_post_allocator_init(); - - dmi_scan_machine(); - - io_delay_init(); - -#ifdef CONFIG_X86_SMP - /* - * setup to use the early static init tables during kernel startup - * X86_SMP will exclude sub-arches that don't deal well with it. - */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif - -#ifdef CONFIG_X86_GENERICARCH - generic_apic_probe(); -#endif - -#ifdef CONFIG_ACPI - /* - * Parse the ACPI tables for possible boot-time SMP configuration. - */ - acpi_boot_table_init(); -#endif - - early_quirks(); - -#ifdef CONFIG_ACPI - acpi_boot_init(); - -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) - if (def_to_bigsmp) - printk(KERN_WARNING "More than 8 CPUs detected and " - "CONFIG_X86_PC cannot handle it.\nUse " - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); -#endif -#endif -#ifdef CONFIG_X86_LOCAL_APIC - if (smp_found_config) - get_smp_config(); -#endif - - e820_register_memory(); - e820_mark_nosave_regions(); - -#ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif -} - -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk(KERN_INFO "Setting up standard PCI resources\n"); - init_iomem_resources(&code_resource, &data_resource, &bss_resource); - - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c deleted file mode 100644 index 6dff1286ad8a..000000000000 --- a/arch/x86/kernel/setup_64.c +++ /dev/null @@ -1,1194 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/screen_info.h> -#include <linux/ioport.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/initrd.h> -#include <linux/highmem.h> -#include <linux/bootmem.h> -#include <linux/module.h> -#include <asm/processor.h> -#include <linux/console.h> -#include <linux/seq_file.h> -#include <linux/crash_dump.h> -#include <linux/root_dev.h> -#include <linux/pci.h> -#include <asm/pci-direct.h> -#include <linux/efi.h> -#include <linux/acpi.h> -#include <linux/kallsyms.h> -#include <linux/edd.h> -#include <linux/iscsi_ibft.h> -#include <linux/mmzone.h> -#include <linux/kexec.h> -#include <linux/cpufreq.h> -#include <linux/dmi.h> -#include <linux/dma-mapping.h> -#include <linux/ctype.h> -#include <linux/sort.h> -#include <linux/uaccess.h> -#include <linux/init_ohci1394_dma.h> -#include <linux/kvm_para.h> - -#include <asm/mtrr.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/vsyscall.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/msr.h> -#include <asm/desc.h> -#include <video/edid.h> -#include <asm/e820.h> -#include <asm/dma.h> -#include <asm/gart.h> -#include <asm/mpspec.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> -#include <asm/setup.h> -#include <asm/numa.h> -#include <asm/sections.h> -#include <asm/dmi.h> -#include <asm/cacheflush.h> -#include <asm/mce.h> -#include <asm/ds.h> -#include <asm/topology.h> -#include <asm/trampoline.h> -#include <asm/pat.h> - -#include <mach_apic.h> -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define ARCH_SETUP -#endif - -/* - * Machine setup.. - */ - -struct cpuinfo_x86 boot_cpu_data __read_mostly; -EXPORT_SYMBOL(boot_cpu_data); - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -unsigned long mmu_cr4_features; - -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ -int bootloader_type; - -unsigned long saved_video_mode; - -int force_mwait __cpuinitdata; - -/* - * Early DMI memory - */ -int dmi_alloc_index; -char dmi_alloc_data[DMI_MAX_DATA]; - -/* - * Setup options - */ -struct screen_info screen_info; -EXPORT_SYMBOL(screen_info); -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; - -struct edid_info edid_info; -EXPORT_SYMBOL_GPL(edid_info); - -extern int root_mountflags; - -char __initdata command_line[COMMAND_LINE_SIZE]; - -static struct resource standard_io_resources[] = { - { .name = "dma1", .start = 0x00, .end = 0x1f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "pic1", .start = 0x20, .end = 0x21, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "timer0", .start = 0x40, .end = 0x43, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "timer1", .start = 0x50, .end = 0x53, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "keyboard", .start = 0x60, .end = 0x60, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "keyboard", .start = 0x64, .end = 0x64, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "dma page reg", .start = 0x80, .end = 0x8f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "pic2", .start = 0xa0, .end = 0xa1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "dma2", .start = 0xc0, .end = 0xdf, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "fpu", .start = 0xf0, .end = 0xff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO } -}; - -#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) - -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_RAM, -}; -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_RAM, -}; -static struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_RAM, -}; - -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -#ifdef CONFIG_PROC_VMCORE -/* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. This option will be passed - * by kexec loader to the capture kernel. - */ -static int __init setup_elfcorehdr(char *arg) -{ - char *end; - if (!arg) - return -EINVAL; - elfcorehdr_addr = memparse(arg, &end); - return end > arg ? 0 : -EINVAL; -} -early_param("elfcorehdr", setup_elfcorehdr); -#endif - -#ifndef CONFIG_NUMA -static void __init -contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long bootmap_size, bootmap; - - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, - PAGE_SIZE); - if (bootmap == -1L) - panic("Cannot find bootmem map of size %ld\n", bootmap_size); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); - e820_register_active_regions(0, start_pfn, end_pfn); - free_bootmem_with_active_regions(0, end_pfn); - early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); -} -#endif - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -struct edd edd; -#ifdef CONFIG_EDD_MODULE -EXPORT_SYMBOL(edd); -#endif -/** - * copy_edd() - Copy the BIOS EDD information - * from boot_params into a safe place. - * - */ -static inline void copy_edd(void) -{ - memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, - sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); - edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; - edd.edd_info_nr = boot_params.eddbuf_entries; -} -#else -static inline void copy_edd(void) -{ -} -#endif - -#ifdef CONFIG_KEXEC -static void __init reserve_crashkernel(void) -{ - unsigned long long total_mem; - unsigned long long crash_size, crash_base; - int ret; - - total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; - - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); - if (ret == 0 && crash_size) { - if (crash_base <= 0) { - printk(KERN_INFO "crashkernel reservation failed - " - "you have to specify a base address\n"); - return; - } - - if (reserve_bootmem(crash_base, crash_size, - BOOTMEM_EXCLUSIVE) < 0) { - printk(KERN_INFO "crashkernel reservation failed - " - "memory is in use\n"); - return; - } - - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); - } -} -#else -static inline void __init reserve_crashkernel(void) -{} -#endif - -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -void __attribute__((weak)) __init memory_setup(void) -{ - machine_specific_memory_setup(); -} - -static void __init parse_setup_data(void) -{ - struct setup_data *data; - unsigned long pa_data; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, PAGE_SIZE); - switch (data->type) { - default: - break; - } -#ifndef CONFIG_DEBUG_BOOT_PARAMS - free_early(pa_data, pa_data+sizeof(*data)+data->len); -#endif - pa_data = data->next; - early_iounmap(data, PAGE_SIZE); - } -} - -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - -/* - * setup_arch - architecture-specific boot-time initializations - * - * Note: On x86_64, fixmaps are ready for use even before this is called. - */ -void __init setup_arch(char **cmdline_p) -{ - unsigned i; - - printk(KERN_INFO "Command line: %s\n", boot_command_line); - - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); - screen_info = boot_params.screen_info; - edid_info = boot_params.edid_info; - saved_video_mode = boot_params.hdr.vid_mode; - bootloader_type = boot_params.hdr.type_of_loader; - -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); -#endif -#ifdef CONFIG_EFI - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL64", 4)) - efi_enabled = 1; -#endif - - ARCH_SETUP - - memory_setup(); - copy_edd(); - - if (!boot_params.hdr.root_flags) - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) &_text; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - code_resource.start = virt_to_phys(&_text); - code_resource.end = virt_to_phys(&_etext)-1; - data_resource.start = virt_to_phys(&_etext); - data_resource.end = virt_to_phys(&_edata)-1; - bss_resource.start = virt_to_phys(&__bss_start); - bss_resource.end = virt_to_phys(&__bss_stop)-1; - - early_identify_cpu(&boot_cpu_data); - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - parse_setup_data(); - - parse_early_param(); - -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - if (init_ohci1394_dma_early) - init_ohci1394_dma_on_all_controllers(); -#endif - - finish_e820_parsing(); - - /* after parse_early_param, so could debug it */ - insert_resource(&iomem_resource, &code_resource); - insert_resource(&iomem_resource, &data_resource); - insert_resource(&iomem_resource, &bss_resource); - - early_gart_iommu_check(); - - e820_register_active_regions(0, 0, -1UL); - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - end_pfn = e820_end_of_ram(); - /* update e820 for memory not covered by WB MTRRs */ - mtrr_bp_init(); - if (mtrr_trim_uncached_memory(end_pfn)) { - e820_register_active_regions(0, 0, -1UL); - end_pfn = e820_end_of_ram(); - } - - num_physpages = end_pfn; - - check_efer(); - - max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); - if (efi_enabled) - efi_init(); - - vsmp_init(); - - dmi_scan_machine(); - - io_delay_init(); - -#ifdef CONFIG_KVM_CLOCK - kvmclock_init(); -#endif - -#ifdef CONFIG_SMP - /* setup to use the early static init tables during kernel startup */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif - -#ifdef CONFIG_ACPI - /* - * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). - * Call this early for SRAT node setup. - */ - acpi_boot_table_init(); -#endif - - /* How many end-of-memory variables you have, grandma! */ - max_low_pfn = end_pfn; - max_pfn = end_pfn; - high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; - - /* Remove active ranges so rediscovery with NUMA-awareness happens */ - remove_all_active_ranges(); - -#ifdef CONFIG_ACPI_NUMA - /* - * Parse SRAT to discover nodes. - */ - acpi_numa_init(); -#endif - -#ifdef CONFIG_NUMA - numa_initmem_init(0, end_pfn); -#else - contig_initmem_init(0, end_pfn); -#endif - - dma32_reserve_bootmem(); - -#ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. - */ - acpi_reserve_bootmem(); -#endif - - if (efi_enabled) - efi_reserve_bootmem(); - - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); -#ifdef CONFIG_BLK_DEV_INITRD - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; - unsigned long end_of_mem = end_pfn << PAGE_SHIFT; - - if (ramdisk_end <= end_of_mem) { - /* - * don't need to reserve again, already reserved early - * in x86_64_start_kernel, and early_res_to_bootmem - * convert that to reserved in bootmem - */ - initrd_start = ramdisk_image + PAGE_OFFSET; - initrd_end = initrd_start+ramdisk_size; - } else { - free_bootmem(ramdisk_image, ramdisk_size); - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - ramdisk_end, end_of_mem); - initrd_start = 0; - } - } -#endif - reserve_crashkernel(); - - reserve_ibft_region(); - - paging_init(); - map_vsyscall(); - - early_quirks(); - -#ifdef CONFIG_ACPI - /* - * Read APIC and some other early information from ACPI tables. - */ - acpi_boot_init(); -#endif - - init_cpu_to_node(); - - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - get_smp_config(); - init_apic_mappings(); - ioapic_init_mappings(); - - kvm_guest_init(); - - /* - * We trust e820 completely. No explicit ROM probing in memory. - */ - e820_reserve_resources(); - e820_mark_nosave_regions(); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - - e820_setup_gap(); - -#ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif - - /* do this before identify_cpu for boot cpu */ - check_enable_amd_mmconf_dmi(); -} - -static int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - return 1; -} - - -static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, eax, ebx, ecx, edx; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; - } - - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } - if (n >= 0x80000008) { - cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -} - -#ifdef CONFIG_NUMA -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - -#ifdef CONFIG_NUMA - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; - -#endif -} - -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) - return 1; - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - unsigned level; - -#ifdef CONFIG_SMP - unsigned long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 15) { - rdmsrl(MSR_K8_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); - } -#endif - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_cpu_cap(c, 0*32+31); - - /* On C+ stepping K8 rep microcode works well for copy/memset */ - level = cpuid_eax(1); - if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || - level >= 0x0f58)) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - level = get_model_name(c); - if (!level) { - switch (c->x86) { - case 15: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) - amd_detect_cmp(c); - - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - - if (c->x86 == 0x10) - fam10h_check_enable_mmcfg(); - - if (amd_apic_timer_broken()) - disable_apic_timer = 1; - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && - (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) - set_memory_4k((unsigned long)__va(tseg), 1); - } -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); - } -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } - -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - - if (c->cpuid_level < 4) - return 1; - - cpuid_count(4, 0, &eax, &t, &t, &t); - - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit srat_detect_node(void) -{ -#ifdef CONFIG_NUMA - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - /* Cache sizes */ - unsigned n; - - init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - - if (cpu_has_ds) { - unsigned int l1, l2; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) - ds_init_intel(c); - - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - /* CPUID workaround for Intel 0F34 CPU */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 0xF && c->x86_model == 0x3 && - c->x86_mask == 0x4) - c->x86_phys_bits = 36; - } - - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); - - srat_detect_node(); -} - -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x6 && c->x86_model >= 0xf) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ - /* Cache sizes */ - unsigned n; - - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } - - if (c->x86 == 0x6 && c->x86_model >= 0xf) { - c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - c->extended_cpuid_level = cpuid_eax(0x80000000); - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - early_init_amd(c); - break; - case X86_VENDOR_INTEL: - early_init_intel(c); - break; - case X86_VENDOR_CENTAUR: - early_init_centaur(c); - break; - } - - validate_pat_support(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - early_identify_cpu(c); - - init_scattered_cpuid_features(c); - - c->apicid = phys_pkg_id(0); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - case X86_VENDOR_UNKNOWN: - default: - display_cacheinfo(c); - break; - } - - detect_ht(c); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - mcheck_init(c); -#endif - select_idle_routine(c); - -#ifdef CONFIG_NUMA - numa_add_cpu(smp_processor_id()); -#endif - -} - -void __cpuinit identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); - mtrr_ap_init(); -} - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c new file mode 100644 index 000000000000..5fc310f746fc --- /dev/null +++ b/arch/x86/kernel/setup_percpu.c @@ -0,0 +1,399 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/percpu.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <asm/smp.h> +#include <asm/percpu.h> +#include <asm/sections.h> +#include <asm/processor.h> +#include <asm/setup.h> +#include <asm/topology.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/highmem.h> + +#ifdef CONFIG_X86_LOCAL_APIC +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +unsigned int max_physical_apicid; +EXPORT_SYMBOL(boot_cpu_physical_apicid); + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; +#endif + +/* map cpu index to physical APIC ID */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#define X86_64_NUMA 1 + +/* map cpu index to node index */ +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +/* which logical CPUs are on which nodes */ +cpumask_t *node_to_cpumask_map; +EXPORT_SYMBOL(node_to_cpumask_map); + +/* setup node_to_cpumask_map */ +static void __init setup_node_to_cpumask_map(void); + +#else +static inline void setup_node_to_cpumask_map(void) { } +#endif + +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +/* + * Copy data used in early init routines from the initial arrays to the + * per cpu data areas. These arrays then become expendable and the + * *_early_ptr's are zeroed indicating that the static arrays are gone. + */ +static void __init setup_per_cpu_maps(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif + } + + /* indicate the early static arrays will soon be gone */ + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; +#endif +} + +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +cpumask_t *cpumask_of_cpu_map __read_mostly; +EXPORT_SYMBOL(cpumask_of_cpu_map); + +/* requires nr_cpu_ids to be initialized */ +static void __init setup_cpumask_of_cpu(void) +{ + int i; + + /* alloc_bootmem zeroes memory */ + cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); + for (i = 0; i < nr_cpu_ids; i++) + cpu_set(i, cpumask_of_cpu_map[i]); +} +#else +static inline void setup_cpumask_of_cpu(void) { } +#endif + +#ifdef CONFIG_X86_32 +/* + * Great future not-so-futuristic plan: make i386 and x86_64 do it + * the same way + */ +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_cpu_pda_map(void) { } + +#elif !defined(CONFIG_SMP) +static inline void setup_cpu_pda_map(void) { } + +#else /* CONFIG_SMP && CONFIG_X86_64 */ + +/* + * Allocate cpu_pda pointer table and array via alloc_bootmem. + */ +static void __init setup_cpu_pda_map(void) +{ + char *pda; + struct x8664_pda **new_cpu_pda; + unsigned long size; + int cpu; + + size = roundup(sizeof(struct x8664_pda), cache_line_size()); + + /* allocate cpu_pda array and pointer table */ + { + unsigned long tsize = nr_cpu_ids * sizeof(void *); + unsigned long asize = size * (nr_cpu_ids - 1); + + tsize = roundup(tsize, cache_line_size()); + new_cpu_pda = alloc_bootmem(tsize + asize); + pda = (char *)new_cpu_pda + tsize; + } + + /* initialize pointer table to static pda's */ + for_each_possible_cpu(cpu) { + if (cpu == 0) { + /* leave boot cpu pda in place */ + new_cpu_pda[0] = cpu_pda(0); + continue; + } + new_cpu_pda[cpu] = (struct x8664_pda *)pda; + new_cpu_pda[cpu]->in_bootmem = 1; + pda += size; + } + + /* point to new pointer table */ + _cpu_pda = new_cpu_pda; +} +#endif + +/* + * Great future plan: + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. + * Always point %gs to its beginning + */ +void __init setup_per_cpu_areas(void) +{ + ssize_t size = PERCPU_ENOUGH_ROOM; + char *ptr; + int cpu; + + /* Setup cpu_pda map */ + setup_cpu_pda_map(); + + /* Copy section for each CPU (we discard the original) */ + size = PERCPU_ENOUGH_ROOM; + printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", + size); + + for_each_possible_cpu(cpu) { +#ifndef CONFIG_NEED_MULTIPLE_NODES + ptr = alloc_bootmem_pages(size); +#else + int node = early_cpu_to_node(cpu); + if (!node_online(node) || !NODE_DATA(node)) { + ptr = alloc_bootmem_pages(size); + printk(KERN_INFO + "cpu %d has no node %d or node-local memory\n", + cpu, node); + } + else + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); +#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + } + + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", + NR_CPUS, nr_cpu_ids, nr_node_ids); + + /* Setup percpu data maps */ + setup_per_cpu_maps(); + + /* Setup node to cpumask map */ + setup_node_to_cpumask_map(); + + /* Setup cpumask_of_cpu map */ + setup_cpumask_of_cpu(); +} + +#endif + +#ifdef X86_64_NUMA + +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + */ +static void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + cpumask_t *map; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + + Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); + + /* node_to_cpumask() will now work */ + node_to_cpumask_map = map; +} + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + if (cpu_pda(cpu) && node != NUMA_NO_NODE) + cpu_pda(cpu)->nodenumber = node; + + if (cpu_to_node_map) + cpu_to_node_map[cpu] = node; + + else if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_node_map, cpu) = node; + + else + Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); +} + +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ + +/* + * --------- debug versions of the numa functions --------- + */ +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + int node = cpu_to_node(cpu); + cpumask_t *mask; + char buf[64]; + + if (node_to_cpumask_map == NULL) { + printk(KERN_ERR "node_to_cpumask_map NULL\n"); + dump_stack(); + return; + } + + mask = &node_to_cpumask_map[node]; + if (enable) + cpu_set(cpu, *mask); + else + cpu_clear(cpu, *mask); + + cpulist_scnprintf(buf, sizeof(buf), *mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); + } + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!per_cpu_offset(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + + +/* empty cpumask */ +static const cpumask_t cpu_mask_none; + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +cpumask_t *_node_to_cpumask_ptr(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", + node); + dump_stack(); + return &cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return (cpumask_t *)&cpu_mask_none; + } + return (cpumask_t *)&node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(_node_to_cpumask_ptr); + +/* + * Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +cpumask_t node_to_cpumask(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); + dump_stack(); + return cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "node_to_cpumask(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return cpu_mask_none; + } + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(node_to_cpumask); + +/* + * --------- end of debug versions of the numa functions --------- + */ + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ + +#endif /* X86_64_NUMA */ + diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 56078d61c793..f35c2d8016ac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -59,7 +59,6 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> -#include <asm/nmi.h> #include <asm/vmi.h> #include <asm/genapic.h> #include <linux/mc146818rtc.h> @@ -68,22 +67,6 @@ #include <mach_wakecpu.h> #include <smpboot_hooks.h> -/* - * FIXME: For x86_64, those are defined in other files. But moving them here, - * would make the setup areas dependent on smp, which is a loss. When we - * integrate apic between arches, we can probably do a better job, but - * right now, they'll stay here -- glommer - */ - -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = - { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_cpu_to_apicid_early_ptr; - -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_bios_cpu_apicid_early_ptr; - #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; static int low_mappings; @@ -198,13 +181,12 @@ static void map_cpu_to_logical_apicid(void) map_cpu_to_node(cpu, node); } -static void unmap_cpu_to_logical_apicid(int cpu) +void numa_remove_cpu(int cpu) { cpu_2_logical_apicid[cpu] = BAD_APICID; unmap_cpu_to_node(cpu); } #else -#define unmap_cpu_to_logical_apicid(cpu) do {} while (0) #define map_cpu_to_logical_apicid() do {} while (0) #endif @@ -346,15 +328,8 @@ static void __cpuinit start_secondary(void *unused) * smp_call_function(). */ lock_ipi_call_lock(); -#ifdef CONFIG_X86_64 - spin_lock(&vector_lock); - - /* Setup the per cpu irq handling data structures */ - __setup_vector_irq(smp_processor_id()); - /* - * Allow the master to continue. - */ - spin_unlock(&vector_lock); +#ifdef CONFIG_X86_IO_APIC + setup_vector_irq(smp_processor_id()); #endif cpu_set(smp_processor_id(), cpu_online_map); unlock_ipi_call_lock(); @@ -366,31 +341,8 @@ static void __cpuinit start_secondary(void *unused) cpu_idle(); } -#ifdef CONFIG_X86_32 -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __devinit initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the ip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"m" (current->thread.sp), "m" (current->thread.ip)); -} -#endif - static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_32 /* * Mask B, Pentium, but not Pentium MMX */ @@ -440,7 +392,6 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) valid_k7: ; -#endif } static void __cpuinit smp_checks(void) @@ -555,23 +506,6 @@ cpumask_t cpu_coregroup_map(int cpu) return c->llc_shared_map; } -#ifdef CONFIG_X86_32 -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} -#endif - static void impress_friends(void) { int cpu; @@ -748,11 +682,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) * target processor state. */ startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, -#ifdef CONFIG_X86_64 - (unsigned long)init_rsp); -#else (unsigned long)stack_start.sp); -#endif /* * Run STARTUP IPI loop. @@ -832,6 +762,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +#ifdef CONFIG_X86_64 +/* + * Allocate node local memory for the AP pda. + * + * Must be called after the _cpu_pda pointer table is initialized. + */ +static int __cpuinit get_local_pda(int cpu) +{ + struct x8664_pda *oldpda, *newpda; + unsigned long size = sizeof(struct x8664_pda); + int node = cpu_to_node(cpu); + + if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) + return 0; + + oldpda = cpu_pda(cpu); + newpda = kmalloc_node(size, GFP_ATOMIC, node); + if (!newpda) { + printk(KERN_ERR "Could not allocate node local PDA " + "for CPU %d on node %d\n", cpu, node); + + if (oldpda) + return 0; /* have a usable pda */ + else + return -1; + } + + if (oldpda) { + memcpy(newpda, oldpda, size); + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, size); + } + + newpda->in_bootmem = 0; + cpu_pda(cpu) = newpda; + return 0; +} +#endif /* CONFIG_X86_64 */ + static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -848,28 +817,14 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* allocate memory for gdts of secondary cpus. Hotplug is considered */ - if (!cpu_gdt_descr[cpu].address && - !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { - printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); - return -1; - } +#ifdef CONFIG_X86_64 /* Allocate node local memory for AP pdas */ - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *newpda, *pda; - int node = cpu_to_node(cpu); - pda = cpu_pda(cpu); - newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, - node); - if (newpda) { - memcpy(newpda, pda, sizeof(struct x8664_pda)); - cpu_pda(cpu) = newpda; - } else - printk(KERN_ERR - "Could not allocate node local PDA for CPU %d on node %d\n", - cpu, node); + if (cpu > 0) { + boot_error = get_local_pda(cpu); + if (boot_error) + goto restore_state; + /* if can't get pda memory, can't start cpu */ } #endif @@ -905,18 +860,15 @@ do_rest: #ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; init_gdt(cpu); - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - c_idle.idle->thread.ip = (unsigned long) start_secondary; /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.sp = (void *) c_idle.idle->thread.sp; irq_ctx_init(cpu); #else cpu_pda(cpu)->pcurrent = c_idle.idle; - init_rsp = c_idle.idle->thread.sp; - load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); - initial_code = (unsigned long)start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + initial_code = (unsigned long)start_secondary; + stack_start.sp = (void *) c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); @@ -987,16 +939,14 @@ do_rest: inquire_remote_apic(apicid); } } - - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); #ifdef CONFIG_X86_64 - clear_node_cpumask(cpu); /* was set by numa_add_cpu */ +restore_state: #endif + if (boot_error) { + /* Try to put things back the way they were before ... */ + numa_remove_cpu(cpu); /* was set by numa_add_cpu */ cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpu_clear(cpu, cpu_possible_map); cpu_clear(cpu, cpu_present_map); per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; } @@ -1088,14 +1038,12 @@ static __init void disable_smp(void) { cpu_present_map = cpumask_of_cpu(0); cpu_possible_map = cpumask_of_cpu(0); -#ifdef CONFIG_X86_32 smpboot_clear_io_apic_irqs(); -#endif + if (smp_found_config) - phys_cpu_present_map = - physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); else - phys_cpu_present_map = physid_mask_of_physid(0); + physid_set_mask_of_physid(0, &phys_cpu_present_map); map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); @@ -1158,12 +1106,12 @@ static int __init smp_sanity_check(unsigned max_cpus) * If SMP should be disabled, then really disable it! */ if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated," - "forcing use of dummy APIC emulation.\n"); + printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); -#ifdef CONFIG_X86_32 + + localise_nmi_watchdog(); + connect_bsp_APIC(); -#endif setup_local_APIC(); end_local_APIC_setup(); return -1; @@ -1191,7 +1139,6 @@ static void __init smp_cpu_index_default(void) void __init native_smp_prepare_cpus(unsigned int max_cpus) { preempt_disable(); - nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; cpu_callin_map = cpumask_of_cpu(0); @@ -1218,9 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_enable(); -#ifdef CONFIG_X86_32 connect_bsp_APIC(); -#endif + /* * Switch from PIC to APIC mode. */ @@ -1258,8 +1204,8 @@ void __init native_smp_prepare_boot_cpu(void) int me = smp_processor_id(); #ifdef CONFIG_X86_32 init_gdt(me); - switch_to_new_gdt(); #endif + switch_to_new_gdt(); /* already set me in cpu_online_map in boot_cpu_init() */ cpu_set(me, cpu_callout_map); per_cpu(cpu_state, me) = CPU_ONLINE; @@ -1279,23 +1225,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_HOTPLUG_CPU -# ifdef CONFIG_X86_32 -void cpu_exit_clear(void) -{ - int cpu = raw_smp_processor_id(); - - idle_task_exit(); - - cpu_uninit(); - irq_ctx_exit(cpu); - - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - - unmap_cpu_to_logical_apicid(cpu); -} -# endif /* CONFIG_X86_32 */ - static void remove_siblinginfo(int cpu) { int sibling; @@ -1349,12 +1278,20 @@ __init void prefill_possible_map(void) int i; int possible; + /* no processor from mptable or madt */ + if (!num_processors) + num_processors = 1; + +#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } +#else + additional_cpus = 0; +#endif possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; @@ -1364,18 +1301,18 @@ __init void prefill_possible_map(void) for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); + + nr_cpu_ids = possible; } static void __ref remove_cpu_from_maps(int cpu) { cpu_clear(cpu, cpu_online_map); -#ifdef CONFIG_X86_64 cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ clear_bit(cpu, (unsigned long *)&cpu_initialized); - clear_node_cpumask(cpu); -#endif + numa_remove_cpu(cpu); } int __cpu_disable(void) diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ae751094eba9..d67ce5f044ba 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +#ifndef CONFIG_X86_NUMAQ static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; +#endif static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) { diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index d2ab52cc1d6b..7066cb855a60 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -19,8 +19,8 @@ #include <linux/utsname.h> #include <linux/ipc.h> -#include <asm/uaccess.h> -#include <asm/unistd.h> +#include <linux/uaccess.h> +#include <linux/unistd.h> asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -103,7 +103,7 @@ asmlinkage int old_select(struct sel_arg_struct __user *arg) * * This is really horribly ugly. */ -asmlinkage int sys_ipc (uint call, int first, int second, +asmlinkage int sys_ipc(uint call, int first, int second, int third, void __user *ptr, long fifth) { int version, ret; @@ -113,24 +113,24 @@ asmlinkage int sys_ipc (uint call, int first, int second, switch (call) { case SEMOP: - return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); + return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); case SEMTIMEDOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, second, (const struct timespec __user *)fifth); case SEMGET: - return sys_semget (first, second, third); + return sys_semget(first, second, third); case SEMCTL: { union semun fourth; if (!ptr) return -EINVAL; if (get_user(fourth.__pad, (void __user * __user *) ptr)) return -EFAULT; - return sys_semctl (first, second, third, fourth); + return sys_semctl(first, second, third, fourth); } case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, + return sys_msgsnd(first, (struct msgbuf __user *) ptr, second, third); case MSGRCV: switch (version) { @@ -138,45 +138,45 @@ asmlinkage int sys_ipc (uint call, int first, int second, struct ipc_kludge tmp; if (!ptr) return -EINVAL; - + if (copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp))) + (struct ipc_kludge __user *) ptr, + sizeof(tmp))) return -EFAULT; - return sys_msgrcv (first, tmp.msgp, second, + return sys_msgrcv(first, tmp.msgp, second, tmp.msgtyp, third); } default: - return sys_msgrcv (first, + return sys_msgrcv(first, (struct msgbuf __user *) ptr, second, fifth, third); } case MSGGET: - return sys_msgget ((key_t) first, second); + return sys_msgget((key_t) first, second); case MSGCTL: - return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); + return sys_msgctl(first, second, (struct msqid_ds __user *) ptr); case SHMAT: switch (version) { default: { ulong raddr; - ret = do_shmat (first, (char __user *) ptr, second, &raddr); + ret = do_shmat(first, (char __user *) ptr, second, &raddr); if (ret) return ret; - return put_user (raddr, (ulong __user *) third); + return put_user(raddr, (ulong __user *) third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) return -EINVAL; /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ - return do_shmat (first, (char __user *) ptr, second, (ulong *) third); + return do_shmat(first, (char __user *) ptr, second, (ulong *) third); } - case SHMDT: - return sys_shmdt ((char __user *)ptr); + case SHMDT: + return sys_shmdt((char __user *)ptr); case SHMGET: - return sys_shmget (first, second, third); + return sys_shmget(first, second, third); case SHMCTL: - return sys_shmctl (first, second, + return sys_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; @@ -186,28 +186,28 @@ asmlinkage int sys_ipc (uint call, int first, int second, /* * Old cruft */ -asmlinkage int sys_uname(struct old_utsname __user * name) +asmlinkage int sys_uname(struct old_utsname __user *name) { int err; if (!name) return -EFAULT; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - return err?-EFAULT:0; + return err? -EFAULT:0; } -asmlinkage int sys_olduname(struct oldold_utsname __user * name) +asmlinkage int sys_olduname(struct oldold_utsname __user *name) { int error; if (!name) return -EFAULT; - if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; - - down_read(&uts_sem); - + + down_read(&uts_sem); + error = __copy_to_user(&name->sysname, &utsname()->sysname, __OLD_UTS_LEN); error |= __put_user(0, name->sysname + __OLD_UTS_LEN); @@ -223,9 +223,9 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) error |= __copy_to_user(&name->machine, &utsname()->machine, __OLD_UTS_LEN); error |= __put_user(0, name->machine + __OLD_UTS_LEN); - + up_read(&uts_sem); - + error = error ? -EFAULT : 0; return error; @@ -241,6 +241,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" : "=a" (__res) - : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); + : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); return __res; } diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 2ff21f398934..059ca6ee59b4 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -39,9 +39,6 @@ #include "do_timer.h" -unsigned int cpu_khz; /* Detected as we calibrate the TSC */ -EXPORT_SYMBOL(cpu_khz); - int timer_ack; unsigned long profile_pc(struct pt_regs *regs) @@ -84,8 +81,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) if (timer_ack) { /* * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to reset the IRR bit for do_slow_gettimeoffset(). - * This will also deassert NMI lines for the watchdog if run + * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ spin_lock(&i8259A_lock); diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c737849e2ef7..e3d49c553af2 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) /* calibrate_cpu is used on systems with fixed rate TSCs to determine * processor frequency */ #define TICK_COUNT 100000000 -unsigned long __init native_calculate_cpu_khz(void) +unsigned long __init calibrate_cpu(void) { int tsc_start, tsc_now; int i, no_ctr_free; @@ -116,23 +116,11 @@ void __init hpet_time_init(void) void __init time_init(void) { - tsc_calibrate(); - - cpu_khz = tsc_khz; - if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calculate_cpu_khz(); - - if (unsynchronized_tsc()) - mark_tsc_unstable("TSCs unsynchronized"); - + tsc_init(); if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) vgetcpu_mode = VGETCPU_RDTSCP; else vgetcpu_mode = VGETCPU_LSL; - printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - init_tsc_clocksource(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index a1f07d793202..5039d0f097a2 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -15,6 +15,8 @@ #include <asm/proto.h> #include <asm/apicdef.h> #include <asm/idle.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_bau.h> #include <mach_ipi.h> /* @@ -162,6 +164,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, union smp_flush_state *f; cpumask_t cpumask = *cpumaskp; + if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) + return; + /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; f = &per_cpu(flush_state, sender); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c new file mode 100644 index 000000000000..d0fbb7712ab0 --- /dev/null +++ b/arch/x86/kernel/tlb_uv.c @@ -0,0 +1,792 @@ +/* + * SGI UltraViolet TLB flush routines. + * + * (c) 2008 Cliff Wickman <[email protected]>, SGI. + * + * This code is released under the GNU General Public License version 2 or + * later. + */ +#include <linux/mc146818rtc.h> +#include <linux/proc_fs.h> +#include <linux/kernel.h> + +#include <asm/mmu_context.h> +#include <asm/uv/uv_mmrs.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_bau.h> +#include <asm/genapic.h> +#include <asm/idle.h> +#include <asm/tsc.h> + +#include <mach_apic.h> + +static struct bau_control **uv_bau_table_bases __read_mostly; +static int uv_bau_retry_limit __read_mostly; + +/* position of pnode (which is nasid>>1): */ +static int uv_nshift __read_mostly; + +static unsigned long uv_mmask __read_mostly; + +static DEFINE_PER_CPU(struct ptc_stats, ptcstats); +static DEFINE_PER_CPU(struct bau_control, bau_control); + +/* + * Free a software acknowledge hardware resource by clearing its Pending + * bit. This will return a reply to the sender. + * If the message has timed out, a reply has already been sent by the + * hardware but the resource has not been released. In that case our + * clear of the Timeout bit (as well) will free the resource. No reply will + * be sent (the hardware will only do one reply per message). + */ +static void uv_reply_to_message(int resource, + struct bau_payload_queue_entry *msg, + struct bau_msg_status *msp) +{ + unsigned long dw; + + dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); + msg->replied_to = 1; + msg->sw_ack_vector = 0; + if (msp) + msp->seen_by.bits = 0; + uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); +} + +/* + * Do all the things a cpu should do for a TLB shootdown message. + * Other cpu's may come here at the same time for this message. + */ +static void uv_bau_process_message(struct bau_payload_queue_entry *msg, + int msg_slot, int sw_ack_slot) +{ + unsigned long this_cpu_mask; + struct bau_msg_status *msp; + int cpu; + + msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; + cpu = uv_blade_processor_id(); + msg->number_of_cpus = + uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); + this_cpu_mask = 1UL << cpu; + if (msp->seen_by.bits & this_cpu_mask) + return; + atomic_or_long(&msp->seen_by.bits, this_cpu_mask); + + if (msg->replied_to == 1) + return; + + if (msg->address == TLB_FLUSH_ALL) { + local_flush_tlb(); + __get_cpu_var(ptcstats).alltlb++; + } else { + __flush_tlb_one(msg->address); + __get_cpu_var(ptcstats).onetlb++; + } + + __get_cpu_var(ptcstats).requestee++; + + atomic_inc_short(&msg->acknowledge_count); + if (msg->number_of_cpus == msg->acknowledge_count) + uv_reply_to_message(sw_ack_slot, msg, msp); +} + +/* + * Examine the payload queue on one distribution node to see + * which messages have not been seen, and which cpu(s) have not seen them. + * + * Returns the number of cpu's that have not responded. + */ +static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) +{ + struct bau_payload_queue_entry *msg; + struct bau_msg_status *msp; + int count = 0; + int i; + int j; + + for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; + msg++, i++) { + if ((msg->sending_cpu == sender) && (!msg->replied_to)) { + msp = bau_tablesp->msg_statuses + i; + printk(KERN_DEBUG + "blade %d: address:%#lx %d of %d, not cpu(s): ", + i, msg->address, msg->acknowledge_count, + msg->number_of_cpus); + for (j = 0; j < msg->number_of_cpus; j++) { + if (!((1L << j) & msp->seen_by.bits)) { + count++; + printk("%d ", j); + } + } + printk("\n"); + } + } + return count; +} + +/* + * Examine the payload queue on all the distribution nodes to see + * which messages have not been seen, and which cpu(s) have not seen them. + * + * Returns the number of cpu's that have not responded. + */ +static int uv_examine_destinations(struct bau_target_nodemask *distribution) +{ + int sender; + int i; + int count = 0; + + sender = smp_processor_id(); + for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { + if (!bau_node_isset(i, distribution)) + continue; + count += uv_examine_destination(uv_bau_table_bases[i], sender); + } + return count; +} + +/* + * wait for completion of a broadcast message + * + * return COMPLETE, RETRY or GIVEUP + */ +static int uv_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift) +{ + int exams = 0; + long destination_timeouts = 0; + long source_timeouts = 0; + unsigned long descriptor_status; + + while ((descriptor_status = (((unsigned long) + uv_read_local_mmr(mmr_offset) >> + right_shift) & UV_ACT_STATUS_MASK)) != + DESC_STATUS_IDLE) { + if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { + source_timeouts++; + if (source_timeouts > SOURCE_TIMEOUT_LIMIT) + source_timeouts = 0; + __get_cpu_var(ptcstats).s_retry++; + return FLUSH_RETRY; + } + /* + * spin here looking for progress at the destinations + */ + if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { + destination_timeouts++; + if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { + /* + * returns number of cpus not responding + */ + if (uv_examine_destinations + (&bau_desc->distribution) == 0) { + __get_cpu_var(ptcstats).d_retry++; + return FLUSH_RETRY; + } + exams++; + if (exams >= uv_bau_retry_limit) { + printk(KERN_DEBUG + "uv_flush_tlb_others"); + printk("giving up on cpu %d\n", + smp_processor_id()); + return FLUSH_GIVEUP; + } + /* + * delays can hang the simulator + udelay(1000); + */ + destination_timeouts = 0; + } + } + } + return FLUSH_COMPLETE; +} + +/** + * uv_flush_send_and_wait + * + * Send a broadcast and wait for a broadcast message to complete. + * + * The cpumaskp mask contains the cpus the broadcast was sent to. + * + * Returns 1 if all remote flushing was done. The mask is zeroed. + * Returns 0 if some remote flushing remains to be done. The mask is left + * unchanged. + */ +int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, + cpumask_t *cpumaskp) +{ + int completion_status = 0; + int right_shift; + int tries = 0; + int blade; + int bit; + unsigned long mmr_offset; + unsigned long index; + cycles_t time1; + cycles_t time2; + + if (cpu < UV_CPUS_PER_ACT_STATUS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = + ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); + } + time1 = get_cycles(); + do { + tries++; + index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | + cpu; + uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); + completion_status = uv_wait_completion(bau_desc, mmr_offset, + right_shift); + } while (completion_status == FLUSH_RETRY); + time2 = get_cycles(); + __get_cpu_var(ptcstats).sflush += (time2 - time1); + if (tries > 1) + __get_cpu_var(ptcstats).retriesok++; + + if (completion_status == FLUSH_GIVEUP) { + /* + * Cause the caller to do an IPI-style TLB shootdown on + * the cpu's, all of which are still in the mask. + */ + __get_cpu_var(ptcstats).ptc_i++; + return 0; + } + + /* + * Success, so clear the remote cpu's from the mask so we don't + * use the IPI method of shootdown on them. + */ + for_each_cpu_mask(bit, *cpumaskp) { + blade = uv_cpu_to_blade_id(bit); + if (blade == this_blade) + continue; + cpu_clear(bit, *cpumaskp); + } + if (!cpus_empty(*cpumaskp)) + return 0; + return 1; +} + +/** + * uv_flush_tlb_others - globally purge translation cache of a virtual + * address or all TLB's + * @cpumaskp: mask of all cpu's in which the address is to be removed + * @mm: mm_struct containing virtual address range + * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * + * This is the entry point for initiating any UV global TLB shootdown. + * + * Purges the translation caches of all specified processors of the given + * virtual address, or purges all TLB's on specified processors. + * + * The caller has derived the cpumaskp from the mm_struct and has subtracted + * the local cpu from the mask. This function is called only if there + * are bits set in the mask. (e.g. flush_tlb_page()) + * + * The cpumaskp is converted into a nodemask of the nodes containing + * the cpus. + * + * Returns 1 if all remote flushing was done. + * Returns 0 if some remote flushing remains to be done. + */ +int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, + unsigned long va) +{ + int i; + int bit; + int blade; + int cpu; + int this_blade; + int locals = 0; + struct bau_desc *bau_desc; + + cpu = uv_blade_processor_id(); + this_blade = uv_numa_blade_id(); + bau_desc = __get_cpu_var(bau_control).descriptor_base; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; + + bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); + + i = 0; + for_each_cpu_mask(bit, *cpumaskp) { + blade = uv_cpu_to_blade_id(bit); + BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); + if (blade == this_blade) { + locals++; + continue; + } + bau_node_set(blade, &bau_desc->distribution); + i++; + } + if (i == 0) { + /* + * no off_node flushing; return status for local node + */ + if (locals) + return 0; + else + return 1; + } + __get_cpu_var(ptcstats).requestor++; + __get_cpu_var(ptcstats).ntargeted += i; + + bau_desc->payload.address = va; + bau_desc->payload.sending_cpu = smp_processor_id(); + + return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); +} + +/* + * The BAU message interrupt comes here. (registered by set_intr_gate) + * See entry_64.S + * + * We received a broadcast assist message. + * + * Interrupts may have been disabled; this interrupt could represent + * the receipt of several messages. + * + * All cores/threads on this node get this interrupt. + * The last one to see it does the s/w ack. + * (the resource will not be freed until noninterruptable cpus see this + * interrupt; hardware will timeout the s/w ack and reply ERROR) + */ +void uv_bau_message_interrupt(struct pt_regs *regs) +{ + struct bau_payload_queue_entry *va_queue_first; + struct bau_payload_queue_entry *va_queue_last; + struct bau_payload_queue_entry *msg; + struct pt_regs *old_regs = set_irq_regs(regs); + cycles_t time1; + cycles_t time2; + int msg_slot; + int sw_ack_slot; + int fw; + int count = 0; + unsigned long local_pnode; + + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + time1 = get_cycles(); + + local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); + + va_queue_first = __get_cpu_var(bau_control).va_queue_first; + va_queue_last = __get_cpu_var(bau_control).va_queue_last; + + msg = __get_cpu_var(bau_control).bau_msg_head; + while (msg->sw_ack_vector) { + count++; + fw = msg->sw_ack_vector; + msg_slot = msg - va_queue_first; + sw_ack_slot = ffs(fw) - 1; + + uv_bau_process_message(msg, msg_slot, sw_ack_slot); + + msg++; + if (msg > va_queue_last) + msg = va_queue_first; + __get_cpu_var(bau_control).bau_msg_head = msg; + } + if (!count) + __get_cpu_var(ptcstats).nomsg++; + else if (count > 1) + __get_cpu_var(ptcstats).multmsg++; + + time2 = get_cycles(); + __get_cpu_var(ptcstats).dflush += (time2 - time1); + + irq_exit(); + set_irq_regs(old_regs); +} + +static void uv_enable_timeouts(void) +{ + int i; + int blade; + int last_blade; + int pnode; + int cur_cpu = 0; + unsigned long apicid; + + last_blade = -1; + for_each_online_node(i) { + blade = uv_node_to_blade_id(i); + if (blade == last_blade) + continue; + last_blade = blade; + apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); + pnode = uv_blade_to_pnode(blade); + cur_cpu += uv_blade_nr_possible_cpus(i); + } +} + +static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) +{ + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +{ + (*offset)++; + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void uv_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +/* + * Display the statistics thru /proc + * data points to the cpu number + */ +static int uv_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *)data; + + if (!cpu) { + seq_printf(file, + "# cpu requestor requestee one all sretry dretry ptc_i "); + seq_printf(file, + "sw_ack sflush dflush sok dnomsg dmult starget\n"); + } + if (cpu < num_possible_cpus() && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", + cpu, stat->requestor, + stat->requestee, stat->onetlb, stat->alltlb, + stat->s_retry, stat->d_retry, stat->ptc_i); + seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", + uv_read_global_mmr64(uv_blade_to_pnode + (uv_cpu_to_blade_id(cpu)), + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), + stat->sflush, stat->dflush, + stat->retriesok, stat->nomsg, + stat->multmsg, stat->ntargeted); + } + + return 0; +} + +/* + * 0: display meaning of the statistics + * >0: retry limit + */ +static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + long newmode; + char optstr[64]; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + if (strict_strtoul(optstr, 10, &newmode) < 0) { + printk(KERN_DEBUG "%s is invalid\n", optstr); + return -EINVAL; + } + + if (newmode == 0) { + printk(KERN_DEBUG "# cpu: cpu number\n"); + printk(KERN_DEBUG + "requestor: times this cpu was the flush requestor\n"); + printk(KERN_DEBUG + "requestee: times this cpu was requested to flush its TLBs\n"); + printk(KERN_DEBUG + "one: times requested to flush a single address\n"); + printk(KERN_DEBUG + "all: times requested to flush all TLB's\n"); + printk(KERN_DEBUG + "sretry: number of retries of source-side timeouts\n"); + printk(KERN_DEBUG + "dretry: number of retries of destination-side timeouts\n"); + printk(KERN_DEBUG + "ptc_i: times UV fell through to IPI-style flushes\n"); + printk(KERN_DEBUG + "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); + printk(KERN_DEBUG + "sflush_us: cycles spent in uv_flush_tlb_others()\n"); + printk(KERN_DEBUG + "dflush_us: cycles spent in handling flush requests\n"); + printk(KERN_DEBUG "sok: successes on retry\n"); + printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); + printk(KERN_DEBUG + "dmult: interrupts with multiple messages\n"); + printk(KERN_DEBUG "starget: nodes targeted\n"); + } else { + uv_bau_retry_limit = newmode; + printk(KERN_DEBUG "timeout retry limit:%d\n", + uv_bau_retry_limit); + } + + return count; +} + +static const struct seq_operations uv_ptc_seq_ops = { + .start = uv_ptc_seq_start, + .next = uv_ptc_seq_next, + .stop = uv_ptc_seq_stop, + .show = uv_ptc_seq_show +}; + +static int uv_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &uv_ptc_seq_ops); +} + +static const struct file_operations proc_uv_ptc_operations = { + .open = uv_ptc_proc_open, + .read = seq_read, + .write = uv_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init uv_ptc_init(void) +{ + struct proc_dir_entry *proc_uv_ptc; + + if (!is_uv_system()) + return 0; + + if (!proc_mkdir("sgi_uv", NULL)) + return -EINVAL; + + proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); + if (!proc_uv_ptc) { + printk(KERN_ERR "unable to create %s proc entry\n", + UV_PTC_BASENAME); + remove_proc_entry("sgi_uv", NULL); + return -EINVAL; + } + proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; + return 0; +} + +/* + * begin the initialization of the per-blade control structures + */ +static struct bau_control * __init uv_table_bases_init(int blade, int node) +{ + int i; + int *ip; + struct bau_msg_status *msp; + struct bau_control *bau_tabp; + + bau_tabp = + kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); + BUG_ON(!bau_tabp); + + bau_tabp->msg_statuses = + kmalloc_node(sizeof(struct bau_msg_status) * + DEST_Q_SIZE, GFP_KERNEL, node); + BUG_ON(!bau_tabp->msg_statuses); + + for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) + bau_cpubits_clear(&msp->seen_by, (int) + uv_blade_nr_possible_cpus(blade)); + + bau_tabp->watching = + kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); + BUG_ON(!bau_tabp->watching); + + for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) + *ip = 0; + + uv_bau_table_bases[blade] = bau_tabp; + + return bau_tabp; +} + +/* + * finish the initialization of the per-blade control structures + */ +static void __init +uv_table_bases_finish(int blade, int node, int cur_cpu, + struct bau_control *bau_tablesp, + struct bau_desc *adp) +{ + struct bau_control *bcp; + int i; + + for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) { + bcp = (struct bau_control *)&per_cpu(bau_control, i); + + bcp->bau_msg_head = bau_tablesp->va_queue_first; + bcp->va_queue_first = bau_tablesp->va_queue_first; + bcp->va_queue_last = bau_tablesp->va_queue_last; + bcp->watching = bau_tablesp->watching; + bcp->msg_statuses = bau_tablesp->msg_statuses; + bcp->descriptor_base = adp; + } +} + +/* + * initialize the sending side's sending buffers + */ +static struct bau_desc * __init +uv_activation_descriptor_init(int node, int pnode) +{ + int i; + unsigned long pa; + unsigned long m; + unsigned long n; + unsigned long mmr_image; + struct bau_desc *adp; + struct bau_desc *ad2; + + adp = (struct bau_desc *) + kmalloc_node(16384, GFP_KERNEL, node); + BUG_ON(!adp); + + pa = __pa((unsigned long)adp); + n = pa >> uv_nshift; + m = pa & uv_mmask; + + mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); + if (mmr_image) { + uv_write_global_mmr64(pnode, (unsigned long) + UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); + } + + for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { + memset(ad2, 0, sizeof(struct bau_desc)); + ad2->header.sw_ack_flag = 1; + ad2->header.base_dest_nodeid = + uv_blade_to_pnode(uv_cpu_to_blade_id(0)); + ad2->header.command = UV_NET_ENDPOINT_INTD; + ad2->header.int_both = 1; + /* + * all others need to be set to zero: + * fairness chaining multilevel count replied_to + */ + } + return adp; +} + +/* + * initialize the destination side's receiving buffers + */ +static struct bau_payload_queue_entry * __init +uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) +{ + struct bau_payload_queue_entry *pqp; + char *cp; + + pqp = (struct bau_payload_queue_entry *) kmalloc_node( + (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), + GFP_KERNEL, node); + BUG_ON(!pqp); + + cp = (char *)pqp + 31; + pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); + bau_tablesp->va_queue_first = pqp; + uv_write_global_mmr64(pnode, + UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, + ((unsigned long)pnode << + UV_PAYLOADQ_PNODE_SHIFT) | + uv_physnodeaddr(pqp)); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, + uv_physnodeaddr(pqp)); + bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, + (unsigned long) + uv_physnodeaddr(bau_tablesp->va_queue_last)); + memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); + + return pqp; +} + +/* + * Initialization of each UV blade's structures + */ +static int __init uv_init_blade(int blade, int node, int cur_cpu) +{ + int pnode; + unsigned long pa; + unsigned long apicid; + struct bau_desc *adp; + struct bau_payload_queue_entry *pqp; + struct bau_control *bau_tablesp; + + bau_tablesp = uv_table_bases_init(blade, node); + pnode = uv_blade_to_pnode(blade); + adp = uv_activation_descriptor_init(node, pnode); + pqp = uv_payload_queue_init(node, pnode, bau_tablesp); + uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp); + /* + * the below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS + */ + apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); + pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); + if ((pa & 0xff) != UV_BAU_MESSAGE) { + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, + ((apicid << 32) | UV_BAU_MESSAGE)); + } + return 0; +} + +/* + * Initialization of BAU-related structures + */ +static int __init uv_bau_init(void) +{ + int blade; + int node; + int nblades; + int last_blade; + int cur_cpu = 0; + + if (!is_uv_system()) + return 0; + + uv_bau_retry_limit = 1; + uv_nshift = uv_hub_info->n_val; + uv_mmask = (1UL << uv_hub_info->n_val) - 1; + nblades = 0; + last_blade = -1; + for_each_online_node(node) { + blade = uv_node_to_blade_id(node); + if (blade == last_blade) + continue; + last_blade = blade; + nblades++; + } + uv_bau_table_bases = (struct bau_control **) + kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); + BUG_ON(!uv_bau_table_bases); + + last_blade = -1; + for_each_online_node(node) { + blade = uv_node_to_blade_id(node); + if (blade == last_blade) + continue; + last_blade = blade; + uv_init_blade(blade, node, cur_cpu); + cur_cpu += uv_blade_nr_possible_cpus(blade); + } + set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + uv_enable_timeouts(); + + return 0; +} +__initcall(uv_bau_init); +__initcall(uv_ptc_init); diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adebb..1106fac6024d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,7 +2,7 @@ #include <asm/trampoline.h> -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +/* ready for x86_64 and x86 */ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 08d752de4eee..8a768973c4f0 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1,5 +1,6 @@ /* * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs * * Pentium III FXSR, SSE support * Gareth Hughes <[email protected]>, May 2000 @@ -60,8 +61,6 @@ #include "mach_traps.h" -int panic_on_unrecovered_nmi; - DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); @@ -98,19 +97,22 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); +int panic_on_unrecovered_nmi; int kstack_depth_to_print = 24; static unsigned int code_bytes = 64; +static int ignore_nmis; +static int die_counter; void printk_address(unsigned long address, int reliable) { #ifdef CONFIG_KALLSYMS - char namebuf[KSYM_NAME_LEN]; unsigned long offset = 0; unsigned long symsize; const char *symname; - char reliab[4] = ""; - char *delim = ":"; char *modname; + char *delim = ":"; + char namebuf[KSYM_NAME_LEN]; + char reliab[4] = ""; symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); @@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable) #endif } -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size) { - return p > (void *)tinfo && - p <= (void *)tinfo + THREAD_SIZE - size; + void *t = tinfo; + return p > t && p <= t + THREAD_SIZE - size; } /* The form of the top of the frame on the stack */ struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; + struct stack_frame *next_frame; + unsigned long return_address; }; static inline unsigned long print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data) { struct stack_frame *frame = (struct stack_frame *)bp; @@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo, return bp; } -#define MSG(msg) ops->warning(data, msg) - void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) @@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; - stack = &dummy; if (task != current) stack = (unsigned long *)task->thread.sp; @@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, } #endif - while (1) { + for (;;) { struct thread_info *context; context = (struct thread_info *) @@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, }; static void @@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { /* try starting at EIP */ ip = (u8 *)regs->ip; code_len = code_len - code_prologue + 1; } for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { + probe_kernel_address(ip, c)) { printk(" Bad EIP value."); break; } @@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } -static int die_counter; - int __kprobes __die(const char *str, struct pt_regs *regs, long err) { unsigned short ss; @@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { - - show_registers(regs); - /* Executive summary in case the oops scrolled away */ - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; - return 0; + show_registers(regs); + /* Executive summary in case the oops scrolled away */ + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); + if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; } - - return 1; + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); + print_symbol("%s", regs->ip); + printk(" SS:ESP %04x:%08lx\n", ss, sp); + return 0; } /* @@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ { \ trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + == NOTIFY_STOP) \ return; \ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + == NOTIFY_STOP) \ return; \ do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ } @@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + == NOTIFY_STOP) \ return; \ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ } @@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code) \ info.si_addr = (void __user *)siaddr; \ trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + == NOTIFY_STOP) \ return; \ do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ } -DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) #ifndef CONFIG_KPROBES DO_VM86_ERROR(3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) -void __kprobes do_general_protection(struct pt_regs *regs, long error_code) +void __kprobes +do_general_protection(struct pt_regs *regs, long error_code) { + struct task_struct *tsk; struct thread_struct *thread; struct tss_struct *tss; int cpu; @@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code) if (regs->flags & X86_VM_MASK) goto gp_in_vm86; + tsk = current; if (!user_mode(regs)) goto gp_in_kernel; - current->thread.error_code = error_code; - current->thread.trap_no = 13; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; - if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && - printk_ratelimit()) { + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", - current->comm, task_pid_nr(current), - regs->ip, regs->sp, error_code); + "%s[%d] general protection ip:%lx sp:%lx error:%lx", + tsk->comm, task_pid_nr(tsk), + regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } - force_sig(SIGSEGV, current); + force_sig(SIGSEGV, tsk); return; gp_in_vm86: @@ -672,14 +668,15 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) { - current->thread.error_code = error_code; - current->thread.trap_no = 13; - if (notify_die(DIE_GPF, "general protection fault", regs, + if (fixup_exception(regs)) + return; + + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); - } + return; + die("general protection fault", regs, error_code); } static notrace __kprobes void @@ -756,9 +753,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; spin_lock(&nmi_print_lock); @@ -767,10 +764,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) * to get a message out: */ bust_spinlocks(1); - printk(KERN_EMERG "%s", msg); + printk(KERN_EMERG "%s", str); printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->ip); show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); @@ -790,14 +789,17 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int cpu; + + cpu = smp_processor_id(); - /* Only the BSP gets external NMIs from the system: */ - if (!smp_processor_id()) + /* Only the BSP gets external NMIs from the system. */ + if (!cpu) reason = get_nmi_reason(); if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) + == NOTIFY_STOP) return; #ifdef CONFIG_X86_LOCAL_APIC /* @@ -806,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) */ if (nmi_watchdog_tick(regs, reason)) return; - if (!do_nmi_callback(regs, smp_processor_id())) + if (!do_nmi_callback(regs, cpu)) unknown_nmi_error(reason, regs); #else unknown_nmi_error(reason, regs); @@ -816,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; + + /* AK: following checks seem to be broken on modern chipsets. FIXME */ if (reason & 0x80) mem_parity_error(reason, regs); if (reason & 0x40) @@ -827,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) reassert_nmi(); } -static int ignore_nmis; - notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; @@ -913,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) tsk->thread.debugctlmsr = 0; if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->flags & X86_EFLAGS_IF) @@ -974,9 +976,8 @@ clear_TF_reenable: void math_error(void __user *ip) { struct task_struct *task; - unsigned short cwd; - unsigned short swd; siginfo_t info; + unsigned short cwd, swd; /* * Save the info for the exception handler and clear the error. @@ -995,7 +996,7 @@ void math_error(void __user *ip) * C1 reg you need in case of a stack fault, 0x040 is the stack * fault bit. We should only be taking one exception at a time, * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't syncronizing its FPU usage + * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to * fully reproduce the context of the exception */ @@ -1004,7 +1005,7 @@ void math_error(void __user *ip) switch (swd & ~cwd & 0x3f) { case 0x000: /* No unmasked exception */ return; - default: /* Multiple exceptions */ + default: /* Multiple exceptions */ break; case 0x001: /* Invalid Op */ /* @@ -1040,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code) static void simd_math_error(void __user *ip) { struct task_struct *task; - unsigned short mxcsr; siginfo_t info; + unsigned short mxcsr; /* * Save the info for the exception handler and clear the error. @@ -1117,7 +1118,7 @@ void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) { - struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; + struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); unsigned long base = (kesp - uesp) & -THREAD_SIZE; unsigned long new_kesp = kesp - base; unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; @@ -1196,19 +1197,16 @@ void __init trap_init(void) early_iounmap(p, 4); #endif -#ifdef CONFIG_X86_LOCAL_APIC - init_apic_mappings(); -#endif - set_trap_gate(0, ÷_error); - set_intr_gate(1, &debug); - set_intr_gate(2, &nmi); - set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ - set_system_gate(4, &overflow); - set_trap_gate(5, &bounds); - set_trap_gate(6, &invalid_op); - set_trap_gate(7, &device_not_available); - set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); - set_trap_gate(9, &coprocessor_segment_overrun); + set_trap_gate(0, ÷_error); + set_intr_gate(1, &debug); + set_intr_gate(2, &nmi); + set_system_intr_gate(3, &int3); /* int3 can be called from all */ + set_system_gate(4, &overflow); /* int4 can be called from all */ + set_trap_gate(5, &bounds); + set_trap_gate(6, &invalid_op); + set_trap_gate(7, &device_not_available); + set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); + set_trap_gate(9, &coprocessor_segment_overrun); set_trap_gate(10, &invalid_TSS); set_trap_gate(11, &segment_not_present); set_trap_gate(12, &stack_segment); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76ea97c4..74e992957ff6 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -10,49 +10,49 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'entry.S'. */ -#include <linux/sched.h> +#include <linux/moduleparam.h> +#include <linux/interrupt.h> +#include <linux/kallsyms.h> +#include <linux/spinlock.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/utsname.h> +#include <linux/kdebug.h> #include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ptrace.h> #include <linux/string.h> +#include <linux/unwind.h> +#include <linux/delay.h> #include <linux/errno.h> -#include <linux/ptrace.h> +#include <linux/kexec.h> +#include <linux/sched.h> #include <linux/timer.h> -#include <linux/mm.h> #include <linux/init.h> -#include <linux/delay.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <linux/kallsyms.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/nmi.h> -#include <linux/kprobes.h> -#include <linux/kexec.h> -#include <linux/unwind.h> -#include <linux/uaccess.h> #include <linux/bug.h> -#include <linux/kdebug.h> -#include <linux/utsname.h> - -#include <mach_traps.h> +#include <linux/nmi.h> +#include <linux/mm.h> #if defined(CONFIG_EDAC) #include <linux/edac.h> #endif -#include <asm/system.h> -#include <asm/io.h> -#include <asm/atomic.h> +#include <asm/stacktrace.h> +#include <asm/processor.h> #include <asm/debugreg.h> +#include <asm/atomic.h> +#include <asm/system.h> +#include <asm/unwind.h> #include <asm/desc.h> #include <asm/i387.h> -#include <asm/processor.h> -#include <asm/unwind.h> +#include <asm/nmi.h> #include <asm/smp.h> +#include <asm/io.h> #include <asm/pgalloc.h> -#include <asm/pda.h> #include <asm/proto.h> -#include <asm/nmi.h> -#include <asm/stacktrace.h> +#include <asm/pda.h> + +#include <mach_traps.h> asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -71,12 +71,15 @@ asmlinkage void general_protection(void); asmlinkage void page_fault(void); asmlinkage void coprocessor_error(void); asmlinkage void simd_coprocessor_error(void); -asmlinkage void reserved(void); asmlinkage void alignment_check(void); -asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); +int panic_on_unrecovered_nmi; +int kstack_depth_to_print = 12; static unsigned int code_bytes = 64; +static int ignore_nmis; +static int die_counter; static inline void conditional_sti(struct pt_regs *regs) { @@ -100,8 +103,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -int kstack_depth_to_print = 12; - void printk_address(unsigned long address, int reliable) { #ifdef CONFIG_KALLSYMS @@ -204,8 +205,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } -#define MSG(txt) ops->warning(data, txt) - /* * x86-64 can have up to three kernel stacks: * process stack @@ -232,11 +231,11 @@ struct stack_frame { unsigned long return_address; }; - -static inline unsigned long print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end) { struct stack_frame *frame = (struct stack_frame *)bp; @@ -258,7 +257,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return bp; } -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, +void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { @@ -267,36 +266,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned used = 0; struct thread_info *tinfo; - if (!tsk) - tsk = current; - tinfo = task_thread_info(tsk); + if (!task) + task = current; if (!stack) { unsigned long dummy; stack = &dummy; - if (tsk && tsk != current) - stack = (unsigned long *)tsk->thread.sp; + if (task && task != current) + stack = (unsigned long *)task->thread.sp; } #ifdef CONFIG_FRAME_POINTER if (!bp) { - if (tsk == current) { + if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp):); + asm("movq %%rbp, %0" : "=r" (bp) :); } else { /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) tsk->thread.sp; + bp = *(unsigned long *) task->thread.sp; } } #endif - - /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested * exceptions */ + tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; @@ -381,18 +378,17 @@ static const struct stacktrace_ops print_trace_ops = { .address = print_trace_address, }; -void -show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - unsigned long bp) +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) { printk("\nCall Trace:\n"); - dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); + dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); printk("\n"); } static void -_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, - unsigned long bp) +_show_stack(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp) { unsigned long *stack; int i; @@ -404,14 +400,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, // back trace for this cpu. if (sp == NULL) { - if (tsk) - sp = (unsigned long *)tsk->thread.sp; + if (task) + sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; } stack = sp; - for(i=0; i < kstack_depth_to_print; i++) { + for (i = 0; i < kstack_depth_to_print; i++) { if (stack >= irqstack && stack <= irqstack_end) { if (stack == irqstack_end) { stack = (unsigned long *) (irqstack_end[-1]); @@ -426,12 +422,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, printk(" %016lx", *stack++); touch_nmi_watchdog(); } - show_trace(tsk, regs, sp, bp); + show_trace(task, regs, sp, bp); } -void show_stack(struct task_struct *tsk, unsigned long * sp) +void show_stack(struct task_struct *task, unsigned long *sp) { - _show_stack(tsk, NULL, sp, 0); + _show_stack(task, NULL, sp, 0); } /* @@ -439,8 +435,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp) */ void dump_stack(void) { - unsigned long dummy; unsigned long bp = 0; + unsigned long stack; #ifdef CONFIG_FRAME_POINTER if (!bp) @@ -452,7 +448,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - show_trace(NULL, NULL, &dummy, bp); + show_trace(NULL, NULL, &stack, bp); } EXPORT_SYMBOL(dump_stack); @@ -463,12 +459,8 @@ void show_registers(struct pt_regs *regs) unsigned long sp; const int cpu = smp_processor_id(); struct task_struct *cur = cpu_pda(cpu)->pcurrent; - u8 *ip; - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; sp = regs->sp; - ip = (u8 *) regs->ip - code_prologue; printk("CPU %d ", cpu); __show_regs(regs); printk("Process %s (pid: %d, threadinfo %p, task %p)\n", @@ -479,15 +471,21 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; unsigned char c; + u8 *ip; + printk("Stack: "); _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); printk("\n"); printk(KERN_EMERG "Code: "); + + ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { /* try starting at RIP */ - ip = (u8 *) regs->ip; + ip = (u8 *)regs->ip; code_len = code_len - code_prologue + 1; } for (i = 0; i < code_len; i++, ip++) { @@ -503,7 +501,7 @@ void show_registers(struct pt_regs *regs) } } printk("\n"); -} +} int is_valid_bugaddr(unsigned long ip) { @@ -561,10 +559,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) do_exit(signr); } -int __kprobes __die(const char * str, struct pt_regs * regs, long err) +int __kprobes __die(const char *str, struct pt_regs *regs, long err) { - static int die_counter; - printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); + printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); #endif @@ -575,8 +572,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) return 1; + show_registers(regs); add_taint(TAINT_DIE); /* Executive summary in case the oops scrolled away */ @@ -588,7 +587,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) return 0; } -void die(const char * str, struct pt_regs * regs, long err) +void die(const char *str, struct pt_regs *regs, long err) { unsigned long flags = oops_begin(); @@ -605,8 +604,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == - NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; flags = oops_begin(); @@ -614,7 +612,9 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) * We are in trouble anyway, lets at least try * to get a message out. */ - printk(str, smp_processor_id()); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); show_registers(regs); if (kexec_should_crash(current)) crash_kexec(regs); @@ -626,44 +626,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) do_exit(SIGBUS); } -static void __kprobes do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, - siginfo_t *info) +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) { struct task_struct *tsk = current; - if (user_mode(regs)) { - /* - * We want error_code and trap_no set for userspace - * faults and kernelspace faults which result in - * die(), but not kernelspace faults which are fixed - * up. die() gives the process no chance to handle - * the signal and notice the kernel fault information, - * so that won't result in polluting the information - * about previously queued, but not yet delivered, - * faults. See also do_general_protection below. - */ - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - - if (show_unhandled_signals && unhandled_signal(tsk, signr) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } + if (!user_mode(regs)) + goto kernel_trap; - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; + /* + * We want error_code and trap_no set for userspace faults and + * kernelspace faults which result in die(), but not + * kernelspace faults which are fixed up. die() gives the + * process no chance to handle the signal and notice the + * kernel fault information, so that won't result in polluting + * the information about previously queued, but not yet + * delivered, faults. See also do_general_protection below. + */ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + + if (show_unhandled_signals && unhandled_signal(tsk, signr) && + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, str, + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); } + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; +kernel_trap: if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -673,41 +673,39 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - trace_hardirqs_fixup(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ } -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_ERROR( 4, SIGSEGV, "overflow", overflow) -DO_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_ERROR(4, SIGSEGV, "overflow", overflow) +DO_ERROR(5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGSEGV, "reserved", reserved) /* Runs on IST stack */ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) @@ -737,31 +735,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) die(str, regs, error_code); } -asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, - long error_code) +asmlinkage void __kprobes +do_general_protection(struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; + struct task_struct *tsk; conditional_sti(regs); - if (user_mode(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } + tsk = current; + if (!user_mode(regs)) + goto gp_in_kernel; - force_sig(SIGSEGV, tsk); - return; - } + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] general protection ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + force_sig(SIGSEGV, tsk); + return; +gp_in_kernel: if (fixup_exception(regs)) return; @@ -774,14 +775,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, } static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs * regs) +mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); #if defined(CONFIG_EDAC) - if(edac_handler_set()) { + if (edac_handler_set()) { edac_atomic_assert_error(); return; } @@ -798,7 +799,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) } static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs * regs) +io_check_error(unsigned char reason, struct pt_regs *regs) { printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); @@ -828,14 +829,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; cpu = smp_processor_id(); - /* Only the BSP gets external NMIs from the system. */ + /* Only the BSP gets external NMIs from the system. */ if (!cpu) reason = get_nmi_reason(); @@ -847,32 +848,57 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. */ - if (nmi_watchdog_tick(regs,reason)) + if (nmi_watchdog_tick(regs, reason)) return; - if (!do_nmi_callback(regs,cpu)) + if (!do_nmi_callback(regs, cpu)) unknown_nmi_error(reason, regs); return; } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; + return; /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); } +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + + add_pda(__nmi_count, 1); + + if (!ignore_nmis) + default_do_nmi(regs); + + nmi_exit(); +} + +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + /* runs on IST stack. */ -asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { trace_hardirqs_fixup(); - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) return; - } + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); @@ -903,8 +929,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) asmlinkage void __kprobes do_debug(struct pt_regs * regs, unsigned long error_code) { - unsigned long condition; struct task_struct *tsk = current; + unsigned long condition; siginfo_t info; trace_hardirqs_fixup(); @@ -925,21 +951,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) { + if (!tsk->thread.debugreg7) goto clear_dr7; - } } tsk->thread.debugreg6 = condition; - /* * Single-stepping through TF: make sure we ignore any events in * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if (!user_mode(regs)) + goto clear_TF_reenable; } /* Ok, finally something we can handle */ @@ -952,7 +976,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - set_debugreg(0UL, 7); + set_debugreg(0, 7); preempt_conditional_cli(regs); return; @@ -960,6 +984,7 @@ clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; preempt_conditional_cli(regs); + return; } static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) @@ -982,7 +1007,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) asmlinkage void do_coprocessor_error(struct pt_regs *regs) { void __user *ip = (void __user *)(regs->ip); - struct task_struct * task; + struct task_struct *task; siginfo_t info; unsigned short cwd, swd; @@ -1015,30 +1040,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); switch (swd & ~cwd & 0x3f) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; + case 0x000: /* No unmasked exception */ + default: /* Multiple exceptions */ + break; + case 0x001: /* Invalid Op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; } force_sig_info(SIGFPE, &info, task); } @@ -1051,7 +1076,7 @@ asmlinkage void bad_intr(void) asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) { void __user *ip = (void __user *)(regs->ip); - struct task_struct * task; + struct task_struct *task; siginfo_t info; unsigned short mxcsr; @@ -1079,25 +1104,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) */ mxcsr = get_fpu_mxcsr(task); switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; } force_sig_info(SIGFPE, &info, task); } @@ -1115,7 +1140,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) } /* - * 'math_state_restore()' saves the current math information in the + * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. @@ -1140,7 +1165,7 @@ asmlinkage void math_state_restore(void) local_irq_disable(); } - clts(); /* Allow maths ops (or we recurse) */ + clts(); /* Allow maths ops (or we recurse) */ restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; @@ -1149,64 +1174,61 @@ EXPORT_SYMBOL_GPL(math_state_restore); void __init trap_init(void) { - set_intr_gate(0,÷_error); - set_intr_gate_ist(1,&debug,DEBUG_STACK); - set_intr_gate_ist(2,&nmi,NMI_STACK); - set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ - set_system_gate(4,&overflow); /* int4 can be called from all */ - set_intr_gate(5,&bounds); - set_intr_gate(6,&invalid_op); - set_intr_gate(7,&device_not_available); - set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); - set_intr_gate(9,&coprocessor_segment_overrun); - set_intr_gate(10,&invalid_TSS); - set_intr_gate(11,&segment_not_present); - set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); - set_intr_gate(13,&general_protection); - set_intr_gate(14,&page_fault); - set_intr_gate(15,&spurious_interrupt_bug); - set_intr_gate(16,&coprocessor_error); - set_intr_gate(17,&alignment_check); + set_intr_gate(0, ÷_error); + set_intr_gate_ist(1, &debug, DEBUG_STACK); + set_intr_gate_ist(2, &nmi, NMI_STACK); + set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ + set_system_gate(4, &overflow); /* int4 can be called from all */ + set_intr_gate(5, &bounds); + set_intr_gate(6, &invalid_op); + set_intr_gate(7, &device_not_available); + set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); + set_intr_gate(9, &coprocessor_segment_overrun); + set_intr_gate(10, &invalid_TSS); + set_intr_gate(11, &segment_not_present); + set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); + set_intr_gate(13, &general_protection); + set_intr_gate(14, &page_fault); + set_intr_gate(15, &spurious_interrupt_bug); + set_intr_gate(16, &coprocessor_error); + set_intr_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE - set_intr_gate_ist(18,&machine_check, MCE_STACK); + set_intr_gate_ist(18, &machine_check, MCE_STACK); #endif - set_intr_gate(19,&simd_coprocessor_error); + set_intr_gate(19, &simd_coprocessor_error); #ifdef CONFIG_IA32_EMULATION set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif - /* * initialize the per thread extended state: */ - init_thread_xstate(); + init_thread_xstate(); /* - * Should be a barrier for any external CPU state. + * Should be a barrier for any external CPU state: */ cpu_init(); } - static int __init oops_setup(char *s) -{ +{ if (!s) return -EINVAL; if (!strcmp(s, "panic")) panic_on_oops = 1; return 0; -} +} early_param("oops", oops_setup); static int __init kstack_setup(char *s) { if (!s) return -EINVAL; - kstack_depth_to_print = simple_strtoul(s,NULL,0); + kstack_depth_to_print = simple_strtoul(s, NULL, 0); return 0; } early_param("kstack", kstack_setup); - static int __init code_bytes_setup(char *s) { code_bytes = simple_strtoul(s, NULL, 0); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c new file mode 100644 index 000000000000..4a775d001957 --- /dev/null +++ b/arch/x86/kernel/tsc.c @@ -0,0 +1,534 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/acpi_pmtmr.h> +#include <linux/cpufreq.h> +#include <linux/dmi.h> +#include <linux/delay.h> +#include <linux/clocksource.h> +#include <linux/percpu.h> + +#include <asm/hpet.h> +#include <asm/timer.h> +#include <asm/vgtod.h> +#include <asm/time.h> +#include <asm/delay.h> + +unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); +unsigned int tsc_khz; +EXPORT_SYMBOL(tsc_khz); + +/* + * TSC can be unstable due to cpufreq or due to unsynced TSCs + */ +static int tsc_unstable; + +/* native_sched_clock() is called before tsc_init(), so + we must start with the TSC soft disabled to prevent + erroneous rdtsc usage on !cpu_has_tsc processors */ +static int tsc_disabled = -1; + +/* + * Scheduler clock - returns current time in nanosec units. + */ +u64 native_sched_clock(void) +{ + u64 this_offset; + + /* + * Fall back to jiffies if there's no TSC available: + * ( But note that we still use it if the TSC is marked + * unstable. We do this because unlike Time Of Day, + * the scheduler clock tolerates small errors and it's + * very important for it to be as fast as the platform + * can achive it. ) + */ + if (unlikely(tsc_disabled)) { + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + } + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long +sched_clock(void) __attribute__((alias("native_sched_clock"))); +#endif + +int check_tsc_unstable(void) +{ + return tsc_unstable; +} +EXPORT_SYMBOL_GPL(check_tsc_unstable); + +#ifdef CONFIG_X86_TSC +int __init notsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC completely.\n"); + tsc_disabled = 1; + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +int __init notsc_setup(char *str) +{ + setup_clear_cpu_cap(X86_FEATURE_TSC); + return 1; +} +#endif + +__setup("notsc", notsc_setup); + +#define MAX_RETRIES 5 +#define SMI_TRESHOLD 50000 + +/* + * Read TSC and the reference counters. Take care of SMI disturbance + */ +static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +{ + u64 t1, t2; + int i; + + for (i = 0; i < MAX_RETRIES; i++) { + t1 = get_cycles(); + if (hpet) + *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + else + *pm = acpi_pm_read_early(); + t2 = get_cycles(); + if ((t2 - t1) < SMI_TRESHOLD) + return t2; + } + return ULLONG_MAX; +} + +/** + * native_calibrate_tsc - calibrate the tsc on boot + */ +unsigned long native_calibrate_tsc(void) +{ + unsigned long flags; + u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; + int hpet = is_hpet_enabled(); + unsigned int tsc_khz_val = 0; + + local_irq_save(flags); + + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + tr1 = get_cycles(); + while ((inb(0x61) & 0x20) == 0); + tr2 = get_cycles(); + + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Preset the result with the raw and inaccurate PIT + * calibration value + */ + delta = (tr2 - tr1); + do_div(delta, 50); + tsc_khz_val = delta; + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) { + printk(KERN_INFO "TSC calibrated against PIT\n"); + goto out; + } + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { + printk(KERN_WARNING "TSC calibration disturbed by SMI, " + "using PIT calibration result\n"); + goto out; + } + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + printk(KERN_INFO "TSC calibrated against HPET\n"); + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_khz_val = tsc2; + +out: + return tsc_khz_val; +} + + +#ifdef CONFIG_X86_32 +/* Only called from the Powernow K7 cpu freq driver */ +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +#endif /* CONFIG_X86_32 */ + +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by [email protected]) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * ([email protected]) + * + * [email protected] "math is hard, lets go shopping!" + */ + +DEFINE_PER_CPU(unsigned long, cyc2ns); + +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +{ + unsigned long long tsc_now, ns_now; + unsigned long flags, *scale; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); +} + +#ifdef CONFIG_CPU_FREQ + +/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency + * changes. + * + * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's + * not that important because current Opteron setups do not support + * scaling on SMP anyroads. + * + * Should fix up last_tsc too. Currently gettimeofday in the + * first tick after the change will be slightly wrong. + */ + +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long tsc_khz_ref; + +static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned long *lpj, dummy; + + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) + return 0; + + lpj = &dummy; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) +#ifdef CONFIG_SMP + lpj = &cpu_data(freq->cpu).loops_per_jiffy; +#else + lpj = &boot_cpu_data.loops_per_jiffy; +#endif + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = *lpj; + tsc_khz_ref = tsc_khz; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + mark_tsc_unstable("cpufreq changes"); + } + + set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} + +core_initcall(cpufreq_tsc); + +#endif /* CONFIG_CPU_FREQ */ + +/* clocksource code */ + +static struct clocksource clocksource_tsc; + +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ +static cycle_t read_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles(); + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; +} + +static cycle_t __vsyscall_fn vread_tsc(void) +{ + cycle_t ret = (cycle_t)vget_cycles(); + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = CLOCKSOURCE_MASK(64), + .shift = 22, + .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_MUST_VERIFY, +#ifdef CONFIG_X86_64 + .vread = vread_tsc, +#endif +}; + +void mark_tsc_unstable(char *reason) +{ + if (!tsc_unstable) { + tsc_unstable = 1; + printk("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) + clocksource_change_rating(&clocksource_tsc, 0); + else + clocksource_tsc.rating = 0; + } +} + +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", + d->ident); + tsc_unstable = 1; + return 0; +} + +/* List of systems that have known TSC problems */ +static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { + { + .callback = dmi_mark_tsc_unstable, + .ident = "IBM Thinkpad 380XD", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), + }, + }, + {} +}; + +/* + * Geode_LX - the OLPC CPU has a possibly a very reliable TSC + */ +#ifdef CONFIG_MGEODE_LX +/* RTSC counts during suspend */ +#define RTSC_SUSP 0x100 + +static void __init check_geode_tsc_reliable(void) +{ + unsigned long res_low, res_high; + + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + if (res_low & RTSC_SUSP) + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} +#else +static inline void check_geode_tsc_reliable(void) { } +#endif + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +__cpuinit int unsynchronized_tsc(void) +{ + if (!cpu_has_tsc || tsc_unstable) + return 1; + +#ifdef CONFIG_SMP + if (apic_is_clustered_box()) + return 1; +#endif + + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + /* assume multi socket systems are not synchronized: */ + if (num_possible_cpus() > 1) + tsc_unstable = 1; + } + + return tsc_unstable; +} + +static void __init init_tsc_clocksource(void) +{ + clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, + clocksource_tsc.shift); + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; + } + clocksource_register(&clocksource_tsc); +} + +void __init tsc_init(void) +{ + u64 lpj; + int cpu; + + if (!cpu_has_tsc) + return; + + tsc_khz = calibrate_tsc(); + cpu_khz = tsc_khz; + + if (!tsc_khz) { + mark_tsc_unstable("could not calculate TSC khz"); + return; + } + +#ifdef CONFIG_X86_64 + if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) + cpu_khz = calibrate_cpu(); +#endif + + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + use_tsc_delay(); + + if (tsc_disabled > 0) + return; + + /* now allow native_sched_clock() to use rdtsc */ + tsc_disabled = 0; + + use_tsc_delay(); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); + + if (unsynchronized_tsc()) + mark_tsc_unstable("TSCs unsynchronized"); + + check_geode_tsc_reliable(); + init_tsc_clocksource(); +} + diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c deleted file mode 100644 index 65b70637ad97..000000000000 --- a/arch/x86/kernel/tsc_32.c +++ /dev/null @@ -1,451 +0,0 @@ -#include <linux/sched.h> -#include <linux/clocksource.h> -#include <linux/workqueue.h> -#include <linux/cpufreq.h> -#include <linux/jiffies.h> -#include <linux/init.h> -#include <linux/dmi.h> -#include <linux/percpu.h> - -#include <asm/delay.h> -#include <asm/tsc.h> -#include <asm/io.h> -#include <asm/timer.h> - -#include "mach_timer.h" - -/* native_sched_clock() is called before tsc_init(), so - we must start with the TSC soft disabled to prevent - erroneous rdtsc usage on !cpu_has_tsc processors */ -static int tsc_disabled = -1; - -/* - * On some systems the TSC frequency does not - * change with the cpu frequency. So we need - * an extra value to store the TSC freq - */ -unsigned int tsc_khz; -EXPORT_SYMBOL_GPL(tsc_khz); - -#ifdef CONFIG_X86_TSC -static int __init tsc_setup(char *str) -{ - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); - tsc_disabled = 1; - return 1; -} -#else -/* - * disable flag for tsc. Takes effect by clearing the TSC cpu flag - * in cpu/common.c - */ -static int __init tsc_setup(char *str) -{ - setup_clear_cpu_cap(X86_FEATURE_TSC); - return 1; -} -#endif - -__setup("notsc", tsc_setup); - -/* - * code to mark and check if the TSC is unstable - * due to cpufreq or due to unsynced TSCs - */ -static int tsc_unstable; - -int check_tsc_unstable(void) -{ - return tsc_unstable; -} -EXPORT_SYMBOL_GPL(check_tsc_unstable); - -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by [email protected]) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * ([email protected]) - * - * [email protected] "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); - -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - /* - * Start smoothly with the new frequency: - */ - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long native_sched_clock(void) -{ - unsigned long long this_offset; - - /* - * Fall back to jiffies if there's no TSC available: - * ( But note that we still use it if the TSC is marked - * unstable. We do this because unlike Time Of Day, - * the scheduler clock tolerates small errors and it's - * very important for it to be as fast as the platform - * can achive it. ) - */ - if (unlikely(tsc_disabled)) - /* No locking but a rare wrong value is not a big deal: */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - - /* read the Time Stamp Counter: */ - rdtscll(this_offset); - - /* return the value in ns */ - return cycles_2_ns(this_offset); -} - -/* We need to define a real function for sched_clock, to override the - weak default version */ -#ifdef CONFIG_PARAVIRT -unsigned long long sched_clock(void) -{ - return paravirt_sched_clock(); -} -#else -unsigned long long sched_clock(void) - __attribute__((alias("native_sched_clock"))); -#endif - -unsigned long native_calculate_cpu_khz(void) -{ - unsigned long long start, end; - unsigned long count; - u64 delta64 = (u64)ULLONG_MAX; - int i; - unsigned long flags; - - local_irq_save(flags); - - /* run 3 times to ensure the cache is warm and to get an accurate reading */ - for (i = 0; i < 3; i++) { - mach_prepare_counter(); - rdtscll(start); - mach_countup(&count); - rdtscll(end); - - /* - * Error: ECTCNEVERSET - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - if (count <= 1) - continue; - - /* cpu freq too slow: */ - if ((end - start) <= CALIBRATE_TIME_MSEC) - continue; - - /* - * We want the minimum time of all runs in case one of them - * is inaccurate due to SMI or other delay - */ - delta64 = min(delta64, (end - start)); - } - - /* cpu freq too fast (or every run was bad): */ - if (delta64 > (1ULL<<32)) - goto err; - - delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ - do_div(delta64,CALIBRATE_TIME_MSEC); - - local_irq_restore(flags); - return (unsigned long)delta64; -err: - local_irq_restore(flags); - return 0; -} - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} - -EXPORT_SYMBOL(recalibrate_cpu_khz); - -#ifdef CONFIG_CPU_FREQ - -/* - * if the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long cpu_khz_ref; - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = data; - - if (!ref_freq) { - if (!freq->old){ - ref_freq = freq->new; - return 0; - } - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; - cpu_khz_ref = cpu_khz; - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data(freq->cpu).loops_per_jiffy = - cpufreq_scale(loops_per_jiffy_ref, - ref_freq, freq->new); - - if (cpu_khz) { - - if (num_online_cpus() == 1) - cpu_khz = cpufreq_scale(cpu_khz_ref, - ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz, freq->cpu); - /* - * TSC based sched_clock turns - * to junk w/ cpufreq - */ - mark_tsc_unstable("cpufreq changes"); - } - } - } - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - return cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -} -core_initcall(cpufreq_tsc); - -#endif - -/* clock source code */ - -static unsigned long current_tsc_khz; -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp issue. This can be observed in - * a very small window right after one CPU updated cycle_last under - * xtime lock and the other CPU reads a TSC value which is smaller - * than the cycle_last reference value due to a TSC which is slighty - * behind. This delta is nowhere else observable, but in that case it - * results in a forward time jump in the range of hours due to the - * unsigned delta calculation of the time keeping core code, which is - * necessary to support wrapping clocksources like pm timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret; - - rdtscll(ret); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /* to be set */ - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to: %s.\n", reason); - /* Can be called before registration */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", - d->ident); - tsc_unstable = 1; - return 0; -} - -/* List of systems that have known TSC problems */ -static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { - { - .callback = dmi_mark_tsc_unstable, - .ident = "IBM Thinkpad 380XD", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), - }, - }, - {} -}; - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (!cpu_has_tsc || tsc_unstable) - return 1; - - /* Anything with constant TSC should be synchronized */ - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* - * Intel systems are normally all synchronized. - * Exceptions must mark TSC as unstable: - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - /* assume multi socket systems are not synchronized: */ - if (num_possible_cpus() > 1) - tsc_unstable = 1; - } - return tsc_unstable; -} - -/* - * Geode_LX - the OLPC CPU has a possibly a very reliable TSC - */ -#ifdef CONFIG_MGEODE_LX -/* RTSC counts during suspend */ -#define RTSC_SUSP 0x100 - -static void __init check_geode_tsc_reliable(void) -{ - unsigned long res_low, res_high; - - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - if (res_low & RTSC_SUSP) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; -} -#else -static inline void check_geode_tsc_reliable(void) { } -#endif - - -void __init tsc_init(void) -{ - int cpu; - - if (!cpu_has_tsc || tsc_disabled > 0) - return; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) { - mark_tsc_unstable("could not calculate TSC khz"); - return; - } - - /* now allow native_sched_clock() to use rdtsc */ - tsc_disabled = 0; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - use_tsc_delay(); - - /* Check and install the TSC clocksource */ - dmi_check_system(bad_tsc_dmi_table); - - unsynchronized_tsc(); - check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, - clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } - clocksource_register(&clocksource_tsc); -} diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c deleted file mode 100644 index 1784b8077a12..000000000000 --- a/arch/x86/kernel/tsc_64.c +++ /dev/null @@ -1,357 +0,0 @@ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/clocksource.h> -#include <linux/time.h> -#include <linux/acpi.h> -#include <linux/cpufreq.h> -#include <linux/acpi_pmtmr.h> - -#include <asm/hpet.h> -#include <asm/timex.h> -#include <asm/timer.h> -#include <asm/vgtod.h> - -static int notsc __initdata = 0; - -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ -EXPORT_SYMBOL(cpu_khz); -unsigned int tsc_khz; -EXPORT_SYMBOL(tsc_khz); - -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by [email protected]) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * ([email protected]) - * - * [email protected] "math is hard, lets go shopping!" - */ -DEFINE_PER_CPU(unsigned long, cyc2ns); - -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -unsigned long long native_sched_clock(void) -{ - unsigned long a = 0; - - /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, - * which means it is not completely exact and may not be monotonous - * between CPUs. But the errors should be too small to matter for - * scheduling purposes. - */ - - rdtscll(a); - return cycles_2_ns(a); -} - -/* We need to define a real function for sched_clock, to override the - weak default version */ -#ifdef CONFIG_PARAVIRT -unsigned long long sched_clock(void) -{ - return paravirt_sched_clock(); -} -#else -unsigned long long -sched_clock(void) __attribute__((alias("native_sched_clock"))); -#endif - - -static int tsc_unstable; - -int check_tsc_unstable(void) -{ - return tsc_unstable; -} -EXPORT_SYMBOL_GPL(check_tsc_unstable); - -#ifdef CONFIG_CPU_FREQ - -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency - * changes. - * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. - * - * Should fix up last_tsc too. Currently gettimeofday in the - * first tick after the change will be slightly wrong. - */ - -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long tsc_khz_ref; - -static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; - - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) -#ifdef CONFIG_SMP - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#else - lpj = &boot_cpu_data.loops_per_jiffy; -#endif - - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; - tsc_khz_ref = tsc_khz; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - *lpj = - cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - - tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - mark_tsc_unstable("cpufreq changes"); - } - - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; -} - -core_initcall(cpufreq_tsc); - -#endif - -#define MAX_RETRIES 5 -#define SMI_TRESHOLD 50000 - -/* - * Read TSC and the reference counters. Take care of SMI disturbance - */ -static unsigned long __init tsc_read_refs(unsigned long *pm, - unsigned long *hpet) -{ - unsigned long t1, t2; - int i; - - for (i = 0; i < MAX_RETRIES; i++) { - t1 = get_cycles(); - if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; - else - *pm = acpi_pm_read_early(); - t2 = get_cycles(); - if ((t2 - t1) < SMI_TRESHOLD) - return t2; - } - return ULONG_MAX; -} - -/** - * tsc_calibrate - calibrate the tsc on boot - */ -void __init tsc_calibrate(void) -{ - unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(), cpu; - - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); - - local_irq_restore(flags); - - /* - * Preset the result with the raw and inaccurate PIT - * calibration value - */ - tsc_khz = (tr2 - tr1) / 50; - - /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; - } - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; - } - - tsc2 = (tsc2 - tsc1) * 1000000L; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000; - hpet2 -= hpet1; - tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; - } - - tsc_khz = tsc2 / tsc1; - -out: - for_each_possible_cpu(cpu) - set_cyc2ns_scale(tsc_khz, cpu); -} - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (tsc_unstable) - return 1; - -#ifdef CONFIG_SMP - if (apic_is_clustered_box()) - return 1; -#endif - - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - /* Assume multi socket systems are not synchronized */ - return num_present_cpus() > 1; -} - -int __init notsc_setup(char *s) -{ - notsc = 1; - return 1; -} - -__setup("notsc", notsc_setup); - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp. This can be observed in a - * very small window right after one CPU updated cycle_last under - * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which - * is smaller than the cycle_last reference value due to a TSC which - * is slighty behind. This delta is nowhere else observable, but in - * that case it results in a forward time jump in the range of hours - * due to the unsigned delta calculation of the time keeping core - * code, which is necessary to support wrapping clocksources like pm - * timer. - */ -static cycle_t read_tsc(void) -{ - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static cycle_t __vsyscall_fn vread_tsc(void) -{ - cycle_t ret = (cycle_t)vget_cycles(); - - return ret >= __vsyscall_gtod_data.clock.cycle_last ? - ret : __vsyscall_gtod_data.clock.cycle_last; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, - .vread = vread_tsc, -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else - clocksource_tsc.rating = 0; - } -} -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -void __init init_tsc_clocksource(void) -{ - if (!notsc) { - clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, - clocksource_tsc.shift); - if (check_tsc_unstable()) - clocksource_tsc.rating = 0; - - clocksource_register(&clocksource_tsc); - } -} diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 956f38927aa7..b15346092b7b 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, insns, ip); case PARAVIRT_PATCH(pv_cpu_ops.iret): return patch_internal(VMI_CALL_IRET, len, insns, ip); - case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): + case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); default: break; @@ -896,7 +896,7 @@ static inline int __init activate_vmi(void) * the backend. They are performance critical anyway, so requiring * a patch is not a big problem. */ - pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; + pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; pv_cpu_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP @@ -932,7 +932,7 @@ static inline int __init activate_vmi(void) pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; #endif pv_time_ops.sched_clock = vmi_sched_clock; - pv_time_ops.get_cpu_khz = vmi_cpu_khz; + pv_time_ops.get_tsc_khz = vmi_tsc_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a2b030780aa9..6953859fe289 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -33,8 +33,7 @@ #include <asm/apic.h> #include <asm/timer.h> #include <asm/i8253.h> - -#include <irq_vectors.h> +#include <asm/irq_vectors.h> #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) @@ -70,8 +69,8 @@ unsigned long long vmi_sched_clock(void) return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } -/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ -unsigned long vmi_cpu_khz(void) +/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ +unsigned long vmi_tsc_khz(void) { unsigned long long khz; khz = vmi_timer_ops.get_cycle_frequency(); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..2674f5796275 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -60,13 +60,6 @@ SECTIONS BUG_TABLE :text - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - RODATA /* writeable */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674b06a5..fd246e22fe6b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -53,13 +53,6 @@ SECTIONS RODATA - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { @@ -177,6 +170,7 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; + . = ALIGN(16); __x86cpuvendor_start = .; .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { *(.x86cpuvendor.init) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index ba8c0b75ab0a..0c029e8959c7 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -15,9 +15,12 @@ #include <linux/init.h> #include <linux/pci_ids.h> #include <linux/pci_regs.h> + +#include <asm/apic.h> #include <asm/pci-direct.h> #include <asm/io.h> #include <asm/paravirt.h> +#include <asm/setup.h> #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 61efa2f7d564..c87cbd84c3e5 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -249,7 +249,7 @@ static ctl_table kernel_root_table2[] = { doesn't violate that. We'll find out if it does. */ static void __cpuinit vsyscall_set_cpu(int cpu) { - unsigned long *d; + unsigned long d; unsigned long node = 0; #ifdef CONFIG_NUMA node = cpu_to_node(cpu); @@ -260,11 +260,11 @@ static void __cpuinit vsyscall_set_cpu(int cpu) /* Store cpu number in limit so that it can be loaded quickly in user space in vgetcpu. 12 bits for the CPU and 8 bits for the node. */ - d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU); - *d = 0x0f40000000000ULL; - *d |= cpu; - *d |= (node & 0xf) << 12; - *d |= (node >> 4) << 48; + d = 0x0f40000000000ULL; + d |= cpu; + d |= (node & 0xf) << 12; + d |= (node >> 4) << 48; + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); } static void __cpuinit cpu_vsyscall_init(void *arg) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index f6c05d0410fb..2f306a826897 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -53,8 +53,3 @@ EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); EXPORT_SYMBOL(_proxy_pda); - -#ifdef CONFIG_PARAVIRT -/* Virtualized guests may want to use it */ -EXPORT_SYMBOL_GPL(cpu_gdt_descr); -#endif diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5c7e2fd52075..50dad44fb542 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void) * what speed it runs at, or 0 if it's unusable as a reliable clock source. * This matches what we want here: if we return 0 from this function, the x86 * TSC clock will give up and not register itself. */ -static unsigned long lguest_cpu_khz(void) +static unsigned long lguest_tsc_khz(void) { return lguest_data.tsc_khz; } @@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void) /* The Linux bootloader header contains an "e820" memory map: the * Launcher populated the first entry with our memory limit. */ - add_memory_region(boot_params.e820_map[0].addr, + e820_add_region(boot_params.e820_map[0].addr, boot_params.e820_map[0].size, boot_params.e820_map[0].type); @@ -998,7 +998,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; - pv_time_ops.get_cpu_khz = lguest_cpu_khz; + pv_time_ops.get_tsc_khz = lguest_tsc_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ @@ -1012,6 +1012,7 @@ __init void lguest_init(void) * clobbered. The Launcher places our initial pagetables somewhere at * the top of our physical memory, so we don't need extra space: set * init_pg_tables_end to the end of the kernel. */ + init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); /* Load the %fs segment register (the per-cpu segment register) with @@ -1065,9 +1066,9 @@ __init void lguest_init(void) pm_power_off = lguest_power_off; machine_ops.restart = lguest_restart; - /* Now we're set up, call start_kernel() in init/main.c and we proceed + /* Now we're set up, call i386_start_kernel() in head32.c and we proceed * to boot as normal. It never returns. */ - start_kernel(); + i386_start_kernel(); } /* * This marks the end of stage II of our journey, The Guest. diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..83226e0a7ce4 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -4,8 +4,8 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o -lib-y := delay_$(BITS).o -lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o +lib-y := delay.o +lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index ee1c3f635157..e5afb4ad3f1c 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -1,8 +1,10 @@ -/* Copyright 2002 Andi Kleen, SuSE Labs. +/* + * Copyright 2008 Vitaly Mayatskikh <[email protected]> + * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU Public License v2. - * - * Functions to copy from and to user space. - */ + * + * Functions to copy from and to user space. + */ #include <linux/linkage.h> #include <asm/dwarf2.h> @@ -20,60 +22,88 @@ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" -2: .byte 0xe9 /* near jump with 32bit immediate */ +2: .byte 0xe9 /* near jump with 32bit immediate */ .long \alt-1b /* offset */ /* or alternatively to alt */ .previous .section .altinstructions,"a" .align 8 .quad 0b .quad 2b - .byte \feature /* when feature is set */ + .byte \feature /* when feature is set */ .byte 5 .byte 5 .previous .endm -/* Standard copy_to_user with segment limit checking */ + .macro ALIGN_DESTINATION +#ifdef FIX_ALIGNMENT + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %r8d,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + .section __ex_table,"a" + .align 8 + .quad 100b,103b + .quad 101b,103b + .previous +#endif + .endm + +/* Standard copy_to_user with segment limit checking */ ENTRY(copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx - jc bad_to_user - cmpq threadinfo_addr_limit(%rax),%rcx + jc bad_to_user + cmpq TI_addr_limit(%rax),%rcx jae bad_to_user - xorl %eax,%eax /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENTRY(copy_user_generic) +/* Standard copy_from_user with segment limit checking */ +ENTRY(copy_from_user) CFI_STARTPROC - movl $1,%ecx /* set zero flag */ + GET_THREAD_INFO(%rax) + movq %rsi,%rcx + addq %rdx,%rcx + jc bad_from_user + cmpq TI_addr_limit(%rax),%rcx + jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC +ENDPROC(copy_from_user) -ENTRY(__copy_from_user_inatomic) +ENTRY(copy_user_generic) CFI_STARTPROC - xorl %ecx,%ecx /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC +ENDPROC(copy_user_generic) -/* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(__copy_from_user_inatomic) CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rsi,%rcx - addq %rdx,%rcx - jc bad_from_user - cmpq threadinfo_addr_limit(%rax),%rcx - jae bad_from_user - movl $1,%ecx /* set zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) - +ENDPROC(__copy_from_user_inatomic) + .section .fixup,"ax" /* must zero dest */ +ENTRY(bad_from_user) bad_from_user: CFI_STARTPROC movl %edx,%ecx @@ -81,271 +111,158 @@ bad_from_user: rep stosb bad_to_user: - movl %edx,%eax + movl %edx,%eax ret CFI_ENDPROC -END(bad_from_user) +ENDPROC(bad_from_user) .previous - - + /* * copy_user_generic_unrolled - memory copy with exception handling. - * This version is for CPUs like P4 that don't have efficient micro code for rep movsq - * - * Input: + * This version is for CPUs like P4 that don't have efficient micro + * code for rep movsq + * + * Input: * rdi destination * rsi source * rdx count - * ecx zero flag -- if true zero destination on error * - * Output: - * eax uncopied bytes or 0 if successful. + * Output: + * eax uncopied bytes or 0 if successfull. */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC - pushq %rbx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbx, 0 - pushq %rcx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rcx, 0 - xorl %eax,%eax /*zero for the exception handler */ - -#ifdef FIX_ALIGNMENT - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jnz .Lbad_alignment -.Lafter_bad_alignment: -#endif - - movq %rdx,%rcx - - movl $64,%ebx - shrq $6,%rdx - decq %rdx - js .Lhandle_tail - - .p2align 4 -.Lloop: -.Ls1: movq (%rsi),%r11 -.Ls2: movq 1*8(%rsi),%r8 -.Ls3: movq 2*8(%rsi),%r9 -.Ls4: movq 3*8(%rsi),%r10 -.Ld1: movq %r11,(%rdi) -.Ld2: movq %r8,1*8(%rdi) -.Ld3: movq %r9,2*8(%rdi) -.Ld4: movq %r10,3*8(%rdi) - -.Ls5: movq 4*8(%rsi),%r11 -.Ls6: movq 5*8(%rsi),%r8 -.Ls7: movq 6*8(%rsi),%r9 -.Ls8: movq 7*8(%rsi),%r10 -.Ld5: movq %r11,4*8(%rdi) -.Ld6: movq %r8,5*8(%rdi) -.Ld7: movq %r9,6*8(%rdi) -.Ld8: movq %r10,7*8(%rdi) - - decq %rdx - + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movq %r8,(%rdi) +6: movq %r9,1*8(%rdi) +7: movq %r10,2*8(%rdi) +8: movq %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movq %r8,4*8(%rdi) +14: movq %r9,5*8(%rdi) +15: movq %r10,6*8(%rdi) +16: movq %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi - - jns .Lloop - - .p2align 4 -.Lhandle_tail: - movl %ecx,%edx - andl $63,%ecx - shrl $3,%ecx - jz .Lhandle_7 - movl $8,%ebx - .p2align 4 -.Lloop_8: -.Ls9: movq (%rsi),%r8 -.Ld9: movq %r8,(%rdi) decl %ecx - leaq 8(%rdi),%rdi + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movq %r8,(%rdi) leaq 8(%rsi),%rsi - jnz .Lloop_8 - -.Lhandle_7: + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f movl %edx,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: -.Ls10: movb (%rsi),%bl -.Ld10: movb %bl,(%rdi) - incq %rdi +21: movb (%rsi),%al +22: movb %al,(%rdi) incq %rsi + incq %rdi decl %ecx - jnz .Lloop_1 - - CFI_REMEMBER_STATE -.Lende: - popq %rcx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rcx - popq %rbx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rbx + jnz 21b +23: xor %eax,%eax ret - CFI_RESTORE_STATE -#ifdef FIX_ALIGNMENT - /* align destination */ - .p2align 4 -.Lbad_alignment: - movl $8,%r9d - subl %ecx,%r9d - movl %r9d,%ecx - cmpq %r9,%rdx - jz .Lhandle_7 - js .Lhandle_7 -.Lalign_1: -.Ls11: movb (%rsi),%bl -.Ld11: movb %bl,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .Lalign_1 - subq %r9,%rdx - jmp .Lafter_bad_alignment -#endif + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: leal (%edx,%ecx,8),%edx + jmp 60f +50: movl %ecx,%edx +60: jmp copy_user_handle_tail /* ecx is zerorest also */ + .previous - /* table sorted by exception address */ .section __ex_table,"a" .align 8 - .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ - .quad .Ls2,.Ls1e - .quad .Ls3,.Ls1e - .quad .Ls4,.Ls1e - .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ - .quad .Ld2,.Ls2e - .quad .Ld3,.Ls3e - .quad .Ld4,.Ls4e - .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ - .quad .Ls6,.Ls5e - .quad .Ls7,.Ls5e - .quad .Ls8,.Ls5e - .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ - .quad .Ld6,.Ls6e - .quad .Ld7,.Ls7e - .quad .Ld8,.Ls8e - .quad .Ls9,.Le_quad - .quad .Ld9,.Le_quad - .quad .Ls10,.Le_byte - .quad .Ld10,.Le_byte -#ifdef FIX_ALIGNMENT - .quad .Ls11,.Lzero_rest - .quad .Ld11,.Lzero_rest -#endif - .quad .Le5,.Le_zero + .quad 1b,30b + .quad 2b,30b + .quad 3b,30b + .quad 4b,30b + .quad 5b,30b + .quad 6b,30b + .quad 7b,30b + .quad 8b,30b + .quad 9b,30b + .quad 10b,30b + .quad 11b,30b + .quad 12b,30b + .quad 13b,30b + .quad 14b,30b + .quad 15b,30b + .quad 16b,30b + .quad 18b,40b + .quad 19b,40b + .quad 21b,50b + .quad 22b,50b .previous - - /* eax: zero, ebx: 64 */ -.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ -.Ls2e: addl $8,%eax -.Ls3e: addl $8,%eax -.Ls4e: addl $8,%eax -.Ls5e: addl $8,%eax -.Ls6e: addl $8,%eax -.Ls7e: addl $8,%eax -.Ls8e: addl $8,%eax - addq %rbx,%rdi /* +64 */ - subq %rax,%rdi /* correct destination with computed offset */ - - shlq $6,%rdx /* loop counter * 64 (stride length) */ - addq %rax,%rdx /* add offset to loopcnt */ - andl $63,%ecx /* remaining bytes */ - addq %rcx,%rdx /* add them */ - jmp .Lzero_rest - - /* exception on quad word loop in tail handling */ - /* ecx: loopcnt/8, %edx: length, rdi: correct */ -.Le_quad: - shll $3,%ecx - andl $7,%edx - addl %ecx,%edx - /* edx: bytes to zero, rdi: dest, eax:zero */ -.Lzero_rest: - cmpl $0,(%rsp) - jz .Le_zero - movq %rdx,%rcx -.Le_byte: - xorl %eax,%eax -.Le5: rep - stosb - /* when there is another exception while zeroing the rest just return */ -.Le_zero: - movq %rdx,%rax - jmp .Lende CFI_ENDPROC -ENDPROC(copy_user_generic) +ENDPROC(copy_user_generic_unrolled) - - /* Some CPUs run faster using the string copy instructions. - This is also a lot simpler. Use them when possible. - Patch in jmps to this code instead of copying it fully - to avoid unwanted aliasing in the exception tables. */ - - /* rdi destination - * rsi source - * rdx count - * ecx zero flag - * - * Output: - * eax uncopied bytes or 0 if successfull. - * - * Only 4GB of copy is supported. This shouldn't be a problem - * because the kernel normally only writes from/to page sized chunks - * even if user space passed a longer buffer. - * And more would be dangerous because both Intel and AMD have - * errata with rep movsq > 4GB. If someone feels the need to fix - * this please consider this. - */ +/* Some CPUs run faster using the string copy instructions. + * This is also a lot simpler. Use them when possible. + * + * Only 4GB of copy is supported. This shouldn't be a problem + * because the kernel normally only writes from/to page sized chunks + * even if user space passed a longer buffer. + * And more would be dangerous because both Intel and AMD have + * errata with rep movsq > 4GB. If someone feels the need to fix + * this please consider this. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ ENTRY(copy_user_generic_string) CFI_STARTPROC - movl %ecx,%r8d /* save zero flag */ + andl %edx,%edx + jz 4f + cmpl $8,%edx + jb 2f /* less than 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION movl %edx,%ecx shrl $3,%ecx - andl $7,%edx - jz 10f -1: rep - movsq - movl %edx,%ecx -2: rep - movsb -9: movl %ecx,%eax - ret - - /* multiple of 8 byte */ -10: rep + andl $7,%edx +1: rep movsq - xor %eax,%eax +2: movl %edx,%ecx +3: rep + movsb +4: xorl %eax,%eax ret - /* exception handling */ -3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ - jmp 6f -5: movl %ecx,%eax /* exception on byte loop */ - /* eax: left over bytes */ -6: testl %r8d,%r8d /* zero flag set? */ - jz 7f - movl %eax,%ecx /* initialize x86 loop counter */ - push %rax - xorl %eax,%eax -8: rep - stosb /* zero the rest */ -11: pop %rax -7: ret - CFI_ENDPROC -END(copy_user_generic_c) + .section .fixup,"ax" +11: leal (%edx,%ecx,8),%ecx +12: movl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous .section __ex_table,"a" - .quad 1b,3b - .quad 2b,5b - .quad 8b,11b - .quad 10b,3b + .align 8 + .quad 1b,11b + .quad 3b,12b .previous + CFI_ENDPROC +ENDPROC(copy_user_generic_string) diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 9d3d1ab83763..93353d6a5263 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -1,4 +1,6 @@ -/* Copyright 2002 Andi Kleen, SuSE Labs. +/* + * Copyright 2008 Vitaly Mayatskikh <[email protected]> + * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU Public License v2. * * Functions to copy from and to user space. @@ -12,204 +14,125 @@ #include <asm/current.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/cpufeature.h> - -/* - * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination/source out of cache for more performance. - * - * Input: - * rdi destination - * rsi source - * rdx count - * rcx zero flag when 1 zero on exception - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -ENTRY(__copy_user_nocache) - CFI_STARTPROC - pushq %rbx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbx, 0 - pushq %rcx /* save zero flag */ - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rcx, 0 - - xorl %eax,%eax /* zero for the exception handler */ + .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT /* check for bad alignment of destination */ movl %edi,%ecx andl $7,%ecx - jnz .Lbad_alignment -.Lafter_bad_alignment: -#endif - - movq %rdx,%rcx - - movl $64,%ebx - shrq $6,%rdx - decq %rdx - js .Lhandle_tail - - .p2align 4 -.Lloop: -.Ls1: movq (%rsi),%r11 -.Ls2: movq 1*8(%rsi),%r8 -.Ls3: movq 2*8(%rsi),%r9 -.Ls4: movq 3*8(%rsi),%r10 -.Ld1: movnti %r11,(%rdi) -.Ld2: movnti %r8,1*8(%rdi) -.Ld3: movnti %r9,2*8(%rdi) -.Ld4: movnti %r10,3*8(%rdi) - -.Ls5: movq 4*8(%rsi),%r11 -.Ls6: movq 5*8(%rsi),%r8 -.Ls7: movq 6*8(%rsi),%r9 -.Ls8: movq 7*8(%rsi),%r10 -.Ld5: movnti %r11,4*8(%rdi) -.Ld6: movnti %r8,5*8(%rdi) -.Ld7: movnti %r9,6*8(%rdi) -.Ld8: movnti %r10,7*8(%rdi) + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %r8d,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous - dec %rdx + .section __ex_table,"a" + .align 8 + .quad 100b,103b + .quad 101b,103b + .previous +#endif + .endm +/* + * copy_user_nocache - Uncached memory copy with exception handling + * This will force destination/source out of cache for more performance. + */ +ENTRY(__copy_user_nocache) + CFI_STARTPROC + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movnti %r8,(%rdi) +6: movnti %r9,1*8(%rdi) +7: movnti %r10,2*8(%rdi) +8: movnti %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movnti %r8,4*8(%rdi) +14: movnti %r9,5*8(%rdi) +15: movnti %r10,6*8(%rdi) +16: movnti %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi - - jns .Lloop - - .p2align 4 -.Lhandle_tail: - movl %ecx,%edx - andl $63,%ecx - shrl $3,%ecx - jz .Lhandle_7 - movl $8,%ebx - .p2align 4 -.Lloop_8: -.Ls9: movq (%rsi),%r8 -.Ld9: movnti %r8,(%rdi) decl %ecx - leaq 8(%rdi),%rdi + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movnti %r8,(%rdi) leaq 8(%rsi),%rsi - jnz .Lloop_8 - -.Lhandle_7: + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f movl %edx,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: -.Ls10: movb (%rsi),%bl -.Ld10: movb %bl,(%rdi) - incq %rdi +21: movb (%rsi),%al +22: movb %al,(%rdi) incq %rsi + incq %rdi decl %ecx - jnz .Lloop_1 - - CFI_REMEMBER_STATE -.Lende: - popq %rcx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE %rcx - popq %rbx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rbx + jnz 21b +23: xorl %eax,%eax sfence ret - CFI_RESTORE_STATE -#ifdef FIX_ALIGNMENT - /* align destination */ - .p2align 4 -.Lbad_alignment: - movl $8,%r9d - subl %ecx,%r9d - movl %r9d,%ecx - cmpq %r9,%rdx - jz .Lhandle_7 - js .Lhandle_7 -.Lalign_1: -.Ls11: movb (%rsi),%bl -.Ld11: movb %bl,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .Lalign_1 - subq %r9,%rdx - jmp .Lafter_bad_alignment -#endif + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: leal (%edx,%ecx,8),%edx + jmp 60f +50: movl %ecx,%edx +60: sfence + movl %r8d,%ecx + jmp copy_user_handle_tail + .previous - /* table sorted by exception address */ .section __ex_table,"a" - .align 8 - .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ - .quad .Ls2,.Ls1e - .quad .Ls3,.Ls1e - .quad .Ls4,.Ls1e - .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ - .quad .Ld2,.Ls2e - .quad .Ld3,.Ls3e - .quad .Ld4,.Ls4e - .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ - .quad .Ls6,.Ls5e - .quad .Ls7,.Ls5e - .quad .Ls8,.Ls5e - .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ - .quad .Ld6,.Ls6e - .quad .Ld7,.Ls7e - .quad .Ld8,.Ls8e - .quad .Ls9,.Le_quad - .quad .Ld9,.Le_quad - .quad .Ls10,.Le_byte - .quad .Ld10,.Le_byte -#ifdef FIX_ALIGNMENT - .quad .Ls11,.Lzero_rest - .quad .Ld11,.Lzero_rest -#endif - .quad .Le5,.Le_zero + .quad 1b,30b + .quad 2b,30b + .quad 3b,30b + .quad 4b,30b + .quad 5b,30b + .quad 6b,30b + .quad 7b,30b + .quad 8b,30b + .quad 9b,30b + .quad 10b,30b + .quad 11b,30b + .quad 12b,30b + .quad 13b,30b + .quad 14b,30b + .quad 15b,30b + .quad 16b,30b + .quad 18b,40b + .quad 19b,40b + .quad 21b,50b + .quad 22b,50b .previous - - /* eax: zero, ebx: 64 */ -.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ -.Ls2e: addl $8,%eax -.Ls3e: addl $8,%eax -.Ls4e: addl $8,%eax -.Ls5e: addl $8,%eax -.Ls6e: addl $8,%eax -.Ls7e: addl $8,%eax -.Ls8e: addl $8,%eax - addq %rbx,%rdi /* +64 */ - subq %rax,%rdi /* correct destination with computed offset */ - - shlq $6,%rdx /* loop counter * 64 (stride length) */ - addq %rax,%rdx /* add offset to loopcnt */ - andl $63,%ecx /* remaining bytes */ - addq %rcx,%rdx /* add them */ - jmp .Lzero_rest - - /* exception on quad word loop in tail handling */ - /* ecx: loopcnt/8, %edx: length, rdi: correct */ -.Le_quad: - shll $3,%ecx - andl $7,%edx - addl %ecx,%edx - /* edx: bytes to zero, rdi: dest, eax:zero */ -.Lzero_rest: - cmpl $0,(%rsp) /* zero flag set? */ - jz .Le_zero - movq %rdx,%rcx -.Le_byte: - xorl %eax,%eax -.Le5: rep - stosb - /* when there is another exception while zeroing the rest just return */ -.Le_zero: - movq %rdx,%rax - jmp .Lende CFI_ENDPROC ENDPROC(__copy_user_nocache) - - diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay.c index d710f2d167bb..f4568605d7d5 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay.c @@ -3,6 +3,7 @@ * * Copyright (C) 1993 Linus Torvalds * Copyright (C) 1997 Martin Mares <[email protected]> + * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> * * The __delay function must _NOT_ be inlined as its execution time * depends wildly on alignment on many x86 processors. The additional @@ -28,16 +29,22 @@ /* simple loop based delay: */ static void delay_loop(unsigned long loops) { - int d0; - - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } /* TSC based delay: */ @@ -91,7 +98,7 @@ void use_tsc_delay(void) int __devinit read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - rdtscl(*timer_val); + rdtscll(*timer_val); return 0; } return -1; @@ -101,31 +108,30 @@ void __delay(unsigned long loops) { delay_fn(loops); } +EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { int d0; xloops *= 4; - __asm__("mull %0" + asm("mull %%edx" :"=d" (xloops), "=&a" (d0) :"1" (xloops), "0" (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); __delay(++xloops); } +EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) { __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } +EXPORT_SYMBOL(__udelay); void __ndelay(unsigned long nsecs) { __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } - -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c deleted file mode 100644 index 4c441be92641..000000000000 --- a/arch/x86/lib/delay_64.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Precise Delay Loops for x86-64 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <[email protected]> - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/timex.h> -#include <linux/preempt.h> -#include <linux/delay.h> -#include <linux/init.h> - -#include <asm/delay.h> -#include <asm/msr.h> - -#ifdef CONFIG_SMP -#include <asm/smp.h> -#endif - -int __devinit read_current_timer(unsigned long *timer_value) -{ - rdtscll(*timer_value); - return 0; -} - -void __delay(unsigned long loops) -{ - unsigned bclock, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); - rdtscl(bclock); - for (;;) { - rdtscl(now); - if ((now - bclock) >= loops) - break; - - /* Allow RT tasks to run */ - preempt_enable(); - rep_nop(); - preempt_disable(); - - /* - * It is possible that we moved to another CPU, and - * since TSC's are per-cpu we need to calculate - * that. The delay must guarantee that we wait "at - * least" the amount of time. Being moved to another - * CPU could make the wait longer but we just need to - * make sure we waited long enough. Rebalance the - * counter for this CPU. - */ - if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); - cpu = smp_processor_id(); - rdtscl(bclock); - } - } - preempt_enable(); -} -EXPORT_SYMBOL(__delay); - -inline void __const_udelay(unsigned long xloops) -{ - __delay(((xloops * HZ * - cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); -} -EXPORT_SYMBOL(__const_udelay); - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} -EXPORT_SYMBOL(__udelay); - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} -EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser.S index 5448876261f8..ad374003742f 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser.S @@ -3,6 +3,7 @@ * * (C) Copyright 1998 Linus Torvalds * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa * * These functions have a non-standard call interface * to make them more efficient, especially as they @@ -13,14 +14,13 @@ /* * __get_user_X * - * Inputs: %rcx contains the address. + * Inputs: %[r|e]ax contains the address. * The register is modified, but all changes are undone * before returning because the C code doesn't know about it. * - * Outputs: %rax is error code (0 or -EFAULT) - * %rdx contains zero-extended value - * - * %r8 is destroyed. + * Outputs: %[r|e]ax is error code (0 or -EFAULT) + * %[r|e]dx contains zero-extended value + * * * These functions should not modify any other registers, * as they get called from within inline assembly. @@ -32,78 +32,73 @@ #include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#include <asm/asm.h> .text ENTRY(__get_user_1) CFI_STARTPROC - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%rcx),%edx - xorl %eax,%eax +1: movzb (%_ASM_AX),%edx + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $1,%rcx - jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 20f - decq %rcx -2: movzwl (%rcx),%edx - xorl %eax,%eax + add $1,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +2: movzwl -1(%_ASM_AX),%edx + xor %eax,%eax ret -20: decq %rcx - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $3,%rcx - jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 30f - subq $3,%rcx -3: movl (%rcx),%edx - xorl %eax,%eax + add $3,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +3: mov -3(%_ASM_AX),%edx + xor %eax,%eax ret -30: subq $3,%rcx - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_4) +#ifdef CONFIG_X86_64 ENTRY(__get_user_8) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $7,%rcx - jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 40f - subq $7,%rcx -4: movq (%rcx),%rdx - xorl %eax,%eax + add $7,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +4: movq -7(%_ASM_AX),%_ASM_DX + xor %eax,%eax ret -40: subq $7,%rcx - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_8) +#endif bad_get_user: CFI_STARTPROC - xorl %edx,%edx - movq $(-EFAULT),%rax + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX ret CFI_ENDPROC END(bad_get_user) .section __ex_table,"a" - .quad 1b,bad_get_user - .quad 2b,bad_get_user - .quad 3b,bad_get_user - .quad 4b,bad_get_user -.previous + _ASM_PTR 1b,bad_get_user + _ASM_PTR 2b,bad_get_user + _ASM_PTR 3b,bad_get_user +#ifdef CONFIG_X86_64 + _ASM_PTR 4b,bad_get_user +#endif diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S deleted file mode 100644 index 6d84b53f12a2..000000000000 --- a/arch/x86/lib/getuser_32.S +++ /dev/null @@ -1,78 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include <linux/linkage.h> -#include <asm/dwarf2.h> -#include <asm/thread_info.h> - - -/* - * __get_user_X - * - * Inputs: %eax contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * %edx contains zero-extended value - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -.text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -1: movzbl (%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - addl $1,%eax - jc bad_get_user - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -2: movzwl -1(%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - addl $3,%eax - jc bad_get_user - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -3: movl -3(%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -bad_get_user: - CFI_STARTPROC - xorl %edx,%edx - movl $-14,%eax - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - .long 1b,bad_get_user - .long 2b,bad_get_user - .long 3b,bad_get_user -.previous diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser.S index f58fba109d18..36b0d15ae6e9 100644 --- a/arch/x86/lib/putuser_32.S +++ b/arch/x86/lib/putuser.S @@ -2,6 +2,8 @@ * __put_user functions. * * (C) Copyright 2005 Linus Torvalds + * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa * * These functions have a non-standard call interface * to make them more efficient, especially as they @@ -11,6 +13,8 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/thread_info.h> +#include <asm/errno.h> +#include <asm/asm.h> /* @@ -26,73 +30,68 @@ */ #define ENTER CFI_STARTPROC ; \ - pushl %ebx ; \ - CFI_ADJUST_CFA_OFFSET 4 ; \ - CFI_REL_OFFSET ebx, 0 ; \ - GET_THREAD_INFO(%ebx) -#define EXIT popl %ebx ; \ - CFI_ADJUST_CFA_OFFSET -4 ; \ - CFI_RESTORE ebx ; \ - ret ; \ + GET_THREAD_INFO(%_ASM_BX) +#define EXIT ret ; \ CFI_ENDPROC .text ENTRY(__put_user_1) ENTER - cmpl TI_addr_limit(%ebx),%ecx + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user -1: movb %al,(%ecx) - xorl %eax,%eax +1: movb %al,(%_ASM_CX) + xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $1,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -2: movw %ax,(%ecx) - xorl %eax,%eax +2: movw %ax,(%_ASM_CX) + xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $3,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -3: movl %eax,(%ecx) - xorl %eax,%eax +3: movl %eax,(%_ASM_CX) + xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $7,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -4: movl %eax,(%ecx) -5: movl %edx,4(%ecx) - xorl %eax,%eax +4: mov %_ASM_AX,(%_ASM_CX) +#ifdef CONFIG_X86_32 +5: movl %edx,4(%_ASM_CX) +#endif + xor %eax,%eax EXIT ENDPROC(__put_user_8) bad_put_user: - CFI_STARTPROC simple - CFI_DEF_CFA esp, 2*4 - CFI_OFFSET eip, -1*4 - CFI_OFFSET ebx, -2*4 - movl $-14,%eax + CFI_STARTPROC + movl $-EFAULT,%eax EXIT END(bad_put_user) .section __ex_table,"a" - .long 1b,bad_put_user - .long 2b,bad_put_user - .long 3b,bad_put_user - .long 4b,bad_put_user - .long 5b,bad_put_user + _ASM_PTR 1b,bad_put_user + _ASM_PTR 2b,bad_put_user + _ASM_PTR 3b,bad_put_user + _ASM_PTR 4b,bad_put_user +#ifdef CONFIG_X86_32 + _ASM_PTR 5b,bad_put_user +#endif .previous diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S deleted file mode 100644 index 4989f5a8fa9b..000000000000 --- a/arch/x86/lib/putuser_64.S +++ /dev/null @@ -1,106 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * (C) Copyright 2005 Andi Kleen - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ - -/* - * __put_user_X - * - * Inputs: %rcx contains the address - * %rdx contains new value - * - * Outputs: %rax is error code (0 or -EFAULT) - * - * %r8 is destroyed. - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#include <linux/linkage.h> -#include <asm/dwarf2.h> -#include <asm/page.h> -#include <asm/errno.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - - .text -ENTRY(__put_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx - jae bad_put_user -1: movb %dl,(%rcx) - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $1,%rcx - jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 20f - decq %rcx -2: movw %dx,(%rcx) - xorl %eax,%eax - ret -20: decq %rcx - jmp bad_put_user - CFI_ENDPROC -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $3,%rcx - jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 30f - subq $3,%rcx -3: movl %edx,(%rcx) - xorl %eax,%eax - ret -30: subq $3,%rcx - jmp bad_put_user - CFI_ENDPROC -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $7,%rcx - jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx - jae 40f - subq $7,%rcx -4: movq %rdx,(%rcx) - xorl %eax,%eax - ret -40: subq $7,%rcx - jmp bad_put_user - CFI_ENDPROC -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC - movq $(-EFAULT),%rax - ret - CFI_ENDPROC -END(bad_put_user) - -.section __ex_table,"a" - .quad 1b,bad_put_user - .quad 2b,bad_put_user - .quad 3b,bad_put_user - .quad 4b,bad_put_user -.previous diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0c89d1bb0287..f4df6e7c718b 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le } EXPORT_SYMBOL(copy_in_user); +/* + * Try to copy last bytes and clear the rest if needed. + * Since protection fault in copy_from/to_user is not a normal situation, + * it is not necessary to optimize tail handling. + */ +unsigned long +copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) +{ + char c; + unsigned zero_len; + + for (; len; --len) { + if (__get_user_nocheck(c, from++, sizeof(char))) + break; + if (__put_user_nocheck(c, to++, sizeof(char))) + break; + } + + for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) + if (__put_user_nocheck(c, to++, sizeof(char))) + break; + return len; +} diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a071824c..2f5e277686b8 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -142,45 +142,3 @@ static int __init print_ipi_mode(void) late_initcall(print_ipi_mode); -/** - * machine_specific_memory_setup - Hook for machine specific memory setup. - * - * Description: - * This is included late in kernel/setup.c so that it can make - * use of all of the static functions. - **/ - -char * __init machine_specific_memory_setup(void) -{ - char *who; - - - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k - < boot_params.screen_info.ext_mem_k) { - mem_size = boot_params.screen_info.ext_mem_k; - who = "BIOS-88"; - } else { - mem_size = boot_params.alt_mem_k; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; -} diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile index 69dd4da218dc..3ef8b43b62fc 100644 --- a/arch/x86/mach-es7000/Makefile +++ b/arch/x86/mach-es7000/Makefile @@ -3,4 +3,3 @@ # obj-$(CONFIG_X86_ES7000) := es7000plat.o -obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d8b86e..4354ce804889 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -52,6 +52,8 @@ static struct mip_reg *host_reg; static int mip_port; static unsigned long mip_addr, host_addr; +int es7000_plat; + /* * GSI override for ES7000 platforms. */ @@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) } #endif -/* - * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic - * arch already has got following function definitions (asm-generic/es7000.c) - * hence no need to define these for that case. - */ -#ifndef CONFIG_X86_GENERICARCH -void es7000_sw_apic(void); -void __init enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -__init int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif -#endif /* COFIG_X86_GENERICARCH */ - static void es7000_spin(int n) { diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 19d6d407737b..0dbd7803a1d5 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -2,7 +2,11 @@ # Makefile for the generic architecture # -EXTRA_CFLAGS := -Iarch/x86/kernel +EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := probe.o summit.o bigsmp.o es7000.o default.o -obj-y += ../../x86/mach-es7000/ +obj-y := probe.o default.o +obj-$(CONFIG_X86_NUMAQ) += numaq.o +obj-$(CONFIG_X86_SUMMIT) += summit.o +obj-$(CONFIG_X86_BIGSMP) += bigsmp.o +obj-$(CONFIG_X86_ES7000) += es7000.o +obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 95fc463056d0..59d771714559 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */ static int hp_ht_bigsmp(const struct dmi_system_id *d) { -#ifdef CONFIG_X86_GENERICARCH printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); dmi_bigsmp = 1; -#endif return 0; } @@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { static int probe_bigsmp(void) { if (def_to_bigsmp) - dmi_bigsmp = 1; + dmi_bigsmp = 1; else dmi_check_system(bigsmp_dmi_table); return dmi_bigsmp; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c new file mode 100644 index 000000000000..8091e68764c4 --- /dev/null +++ b/arch/x86/mach-generic/numaq.c @@ -0,0 +1,41 @@ +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/smp.h> +#include <asm/mpspec.h> +#include <asm/genapic.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <asm/mach-numaq/mach_apic.h> +#include <asm/mach-numaq/mach_apicdef.h> +#include <asm/mach-numaq/mach_ipi.h> +#include <asm/mach-numaq/mach_mpparse.h> +#include <asm/mach-numaq/mach_wakecpu.h> +#include <asm/numaq.h> + +static int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +/* Hook from generic ACPI tables.c */ +static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} + +struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index c5ae751b994a..5a7e4619e1c4 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -16,6 +16,7 @@ #include <asm/apicdef.h> #include <asm/genapic.h> +extern struct genapic apic_numaq; extern struct genapic apic_summit; extern struct genapic apic_bigsmp; extern struct genapic apic_es7000; @@ -24,9 +25,18 @@ extern struct genapic apic_default; struct genapic *genapic = &apic_default; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_NUMAQ + &apic_numaq, +#endif +#ifdef CONFIG_X86_SUMMIT &apic_summit, +#endif +#ifdef CONFIG_X86_BIGSMP &apic_bigsmp, +#endif +#ifdef CONFIG_X86_ES7000 &apic_es7000, +#endif &apic_default, /* must be last */ NULL, }; @@ -54,6 +64,7 @@ early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { +#ifdef CONFIG_X86_BIGSMP /* * This routine is used to switch to bigsmp mode when * - There is no apic= option specified by the user @@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void) printk(KERN_INFO "Overriding APIC driver with %s\n", genapic->name); } +#endif } void __init generic_apic_probe(void) @@ -88,7 +100,8 @@ void __init generic_apic_probe(void) /* These functions can switch the APIC even after the initial ->probe() */ -int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) +int __init mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) { int i; for (i = 0; apic_probe[i]; ++i) { diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index 57484e91ab90..a2fb78c0d154 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c @@ -8,11 +8,6 @@ #include "cobalt.h" #include "mach_apic.h" -/* Have we found an MP table */ -int smp_found_config; - -int pic_mode; - extern unsigned int __cpuinitdata maxcpus; /* @@ -76,7 +71,9 @@ void __init find_smp_config(void) if (ncpus > maxcpus) ncpus = maxcpus; +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif while (ncpus--) MP_processor_info(mp++); diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c index de4c9dbd086f..d67868ec9b7f 100644 --- a/arch/x86/mach-visws/setup.c +++ b/arch/x86/mach-visws/setup.c @@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void) sgivwfb_mem_size &= ~((1 << 20) - 1); sgivwfb_mem_phys = mem_size - gfx_mem_size; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); + e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); return "PROM"; } diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index cef9cb1d15ac..d8b2cfd85d92 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -21,10 +21,9 @@ #include <asm/io.h> #include <asm/apic.h> #include <asm/i8259.h> +#include <asm/irq_vectors.h> #include "cobalt.h" -#include "irq_vectors.h" - static DEFINE_SPINLOCK(cobalt_lock); diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466b9eb9..6bbdd633864c 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -62,6 +62,7 @@ void __init time_init_hook(void) char *__init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "NOT VOYAGER"; @@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void) e820.nr_map = 0; for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { - add_memory_region(addr, length, E820_RAM); + e820_add_region(addr, length, E820_RAM); } return who; } else if (voyager_level == 4) { @@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void) tom = (boot_params.screen_info.ext_mem_k) << 10; } who = "Voyager-TOM"; - add_memory_region(0, 0x9f000, E820_RAM); + e820_add_region(0, 0x9f000, E820_RAM); /* map from 1M to top of memory */ - add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, + e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, E820_RAM); /* FIXME: Should check the ASICs to see if I need to * take out the 8M window. Just do it at the moment * */ - add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024, + e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, E820_RESERVED); return who; } - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); - if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { - mem_size = boot_params.screen_info.ext_mem_k; - who = "BIOS-88"; - } else { - mem_size = boot_params.alt_mem_k; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; + return default_machine_specific_memory_setup(); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a5..8dedd01e909f 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0; * activity count. Finally exported by i386_ksyms.c */ static int voyager_extended_cpus = 1; -/* Have we found an SMP box - used by time.c to do the profiling - interrupt for timeslicing; do not set to 1 until the per CPU timer - interrupt is active */ -int smp_found_config = 0; - /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; @@ -1137,15 +1132,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, 0, 1, 1); } -/* used to set up the trampoline for other CPUs when the memory manager - * is sorted out */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - if (__pa(trampoline_base) >= 0x93000) - BUG(); -} - /* send a reschedule CPI to one CPU by physical CPU number*/ static void voyager_smp_send_reschedule(int cpu) { diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c index 04869e64b18e..00548354912f 100644 --- a/arch/x86/math-emu/reg_constant.c +++ b/arch/x86/math-emu/reg_constant.c @@ -16,8 +16,8 @@ #include "reg_constant.h" #include "control_w.h" -#define MAKE_REG(s,e,l,h) { l, h, \ - ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } +#define MAKE_REG(s, e, l, h) { l, h, \ + ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); #if 0 @@ -40,7 +40,7 @@ FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); /* Only the sign and significand (and tag) are used in internal NaNs */ -/* The 80486 never generates one of these +/* The 80486 never generates one of these FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); */ /* This is the real indefinite QNaN */ @@ -49,7 +49,7 @@ FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); /* Only the sign (and tag) is used in internal infinities */ FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); -static void fld_const(FPU_REG const *c, int adj, u_char tag) +static void fld_const(FPU_REG const * c, int adj, u_char tag) { FPU_REG *st_new_ptr; diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index b7b3e4c7cfc9..c107641cd39b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -13,5 +13,6 @@ obj-$(CONFIG_NUMA) += discontig_32.o else obj-$(CONFIG_NUMA) += numa_64.o obj-$(CONFIG_K8_NUMA) += k8topology_64.o -obj-$(CONFIG_ACPI_NUMA) += srat_64.o endif +obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o + diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf983687..5dfef9fa061a 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -38,6 +38,7 @@ #include <asm/setup.h> #include <asm/mmzone.h> #include <asm/bios_ebda.h> +#include <asm/proto.h> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; /* * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 256Mb break (each element of the array will - * represent 256Mb of memory and will be marked by the node id. so, + * numa node on a 64Mb break (each element of the array will + * represent 64Mb of memory and will be marked by the node id. so, * if the first gig is on node 0, and the second gig is on node 1 * physnode_map will contain: * - * physnode_map[0-3] = 0; - * physnode_map[4-7] = 1; - * physnode_map[8- ] = -1; + * physnode_map[0-15] = 0; + * physnode_map[16-31] = 1; + * physnode_map[32- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); @@ -75,15 +76,15 @@ void memory_present(int nid, unsigned long start, unsigned long end) { unsigned long pfn; - printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n", + printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", nid, start, end); printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); printk(KERN_DEBUG " "); for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk("%ld ", pfn); + printk(KERN_CONT "%lx ", pfn); } - printk("\n"); + printk(KERN_CONT "\n"); } unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, @@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, #endif extern unsigned long find_max_low_pfn(void); -extern void add_one_highpage_init(struct page *, int, int); extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) @@ -117,13 +117,13 @@ static unsigned long kva_pages; */ int __init get_memcfg_numa_flat(void) { - printk("NUMA - single node, flat memory mode\n"); + printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); - /* Run the memory configuration and find the top of memory. */ - propagate_e820_map(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; + e820_register_active_regions(0, 0, max_pfn); memory_present(0, 0, max_pfn); + node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); /* Indicate there is one node available. */ nodes_clear(node_online_map); @@ -156,24 +156,32 @@ static void __init propagate_e820_map_node(int nid) */ static void __init allocate_pgdat(int nid) { - if (nid && node_has_online_mem(nid)) + char buf[16]; + + if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); - min_low_pfn += PFN_UP(sizeof(pg_data_t)); + unsigned long pgdat_phys; + pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, + max_pfn_mapped<<PAGE_SHIFT, + sizeof(pg_data_t), + PAGE_SIZE); + NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); + memset(buf, 0, sizeof(buf)); + sprintf(buf, "NODE_DATA %d", nid); + reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); } + printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", + nid, (unsigned long)NODE_DATA(nid)); } -#ifdef CONFIG_DISCONTIGMEM /* - * In the discontig memory model, a portion of the kernel virtual area (KVA) - * is reserved and portions of nodes are mapped using it. This is to allow - * node-local memory to be allocated for structures that would normally require - * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers - * should be prepared to allocate from the bootmem allocator instead. This KVA - * mechanism is incompatible with SPARSEMEM as it makes assumptions about the - * layout of memory that are broken if alloc_remap() succeeds for some of the - * map and fails for others + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; @@ -195,15 +203,19 @@ void *alloc_remap(int nid, unsigned long size) return allocation; } -void __init remap_numa_kva(void) +static void __init remap_numa_kva(void) { void *vaddr; unsigned long pfn; int node; for_each_online_node(node) { + printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); + printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", + (unsigned long)vaddr, + node_remap_start_pfn[node] + pfn); set_pmd_pfn((ulong) vaddr, node_remap_start_pfn[node] + pfn, PAGE_KERNEL_LARGE); @@ -215,17 +227,21 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; - unsigned long pfn; for_each_online_node(nid) { - unsigned old_end_pfn = node_end_pfn[nid]; + u64 node_kva_target; + u64 node_kva_final; /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. */ + printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", + nid, node_start_pfn[nid], node_end_pfn[nid]); if (node_start_pfn[nid] > max_pfn) continue; + if (!node_end_pfn[nid]) + continue; if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; @@ -237,41 +253,48 @@ static unsigned long calculate_numa_remap_pages(void) /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; - /* - * Validate the region we are allocating only contains valid - * pages. - */ - for (pfn = node_end_pfn[nid] - size; - pfn < node_end_pfn[nid]; pfn++) - if (!page_is_ram(pfn)) - break; - - if (pfn != node_end_pfn[nid]) - size = 0; + node_kva_target = round_down(node_end_pfn[nid] - size, + PTRS_PER_PTE); + node_kva_target <<= PAGE_SHIFT; + do { + node_kva_final = find_e820_area(node_kva_target, + ((u64)node_end_pfn[nid])<<PAGE_SHIFT, + ((u64)size)<<PAGE_SHIFT, + LARGE_PAGE_BYTES); + node_kva_target -= LARGE_PAGE_BYTES; + } while (node_kva_final == -1ULL && + (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); + + if (node_kva_final == -1ULL) + panic("Can not get kva ram\n"); - printk("Reserving %ld pages of KVA for lmem_map of node %d\n", - size, nid); node_remap_size[nid] = size; node_remap_offset[nid] = reserve_pages; reserve_pages += size; - printk("Shrinking node %d from %ld pages to %ld pages\n", - nid, node_end_pfn[nid], node_end_pfn[nid] - size); - - if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { - /* - * Align node_end_pfn[] and node_remap_start_pfn[] to - * pmd boundary. remap_numa_kva will barf otherwise. - */ - printk("Shrinking node %d further by %ld pages for proper alignment\n", - nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); - size += node_end_pfn[nid] & (PTRS_PER_PTE-1); - } + printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" + " node %d at %llx\n", + size, nid, node_kva_final>>PAGE_SHIFT); + + /* + * prevent kva address below max_low_pfn want it on system + * with less memory later. + * layout will be: KVA address , KVA RAM + * + * we are supposed to only record the one less then max_low_pfn + * but we could have some hole in high memory, and it will only + * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide + * to use it as free. + * So reserve_early here, hope we don't run out of that array + */ + reserve_early(node_kva_final, + node_kva_final+(((u64)size)<<PAGE_SHIFT), + "KVA RAM"); - node_end_pfn[nid] -= size; - node_remap_start_pfn[nid] = node_end_pfn[nid]; - shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); + node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; + remove_active_range(nid, node_remap_start_pfn[nid], + node_remap_start_pfn[nid] + size); } - printk("Reserving total of %ld pages for numa KVA remap\n", + printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", reserve_pages); return reserve_pages; } @@ -285,37 +308,16 @@ static void init_remap_allocator(int nid) node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + ALIGN(sizeof(pg_data_t), PAGE_SIZE); - printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, + printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn - + node_remap_offset[nid] + node_remap_size[nid])); -} -#else -void *alloc_remap(int nid, unsigned long size) -{ - return NULL; -} - -static unsigned long calculate_numa_remap_pages(void) -{ - return 0; -} - -static void init_remap_allocator(int nid) -{ -} - -void __init remap_numa_kva(void) -{ + (ulong) node_remap_end_vaddr[nid]); } -#endif /* CONFIG_DISCONTIGMEM */ -extern void setup_bootmem_allocator(void); -unsigned long __init setup_memory(void) +void __init initmem_init(unsigned long start_pfn, + unsigned long end_pfn) { int nid; - unsigned long system_start_pfn, system_max_low_pfn; - unsigned long wasted_pages; + long kva_target_pfn; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -324,109 +326,77 @@ unsigned long __init setup_memory(void) * this space and use it to adjust the boundary between ZONE_NORMAL * and ZONE_HIGHMEM. */ - get_memcfg_numa(); - kva_pages = calculate_numa_remap_pages(); + get_memcfg_numa(); - /* partially used pages are not usable - thus round upwards */ - system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); + kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); - kva_start_pfn = find_max_low_pfn() - kva_pages; + kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); + do { + kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, + max_low_pfn<<PAGE_SHIFT, + kva_pages<<PAGE_SHIFT, + PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; + kva_target_pfn -= PTRS_PER_PTE; + } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); -#ifdef CONFIG_BLK_DEV_INITRD - /* Numa kva area is below the initrd */ - if (initrd_start) - kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) - - kva_pages; -#endif + if (kva_start_pfn == -1UL) + panic("Can not get kva space\n"); - /* - * We waste pages past at the end of the KVA for no good reason other - * than how it is located. This is bad. - */ - wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1); - kva_start_pfn -= wasted_pages; - kva_pages += wasted_pages; - - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); - printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", kva_start_pfn, max_low_pfn); - printk("max_pfn = %ld\n", max_pfn); + printk(KERN_INFO "max_pfn = %lx\n", max_pfn); + + /* avoid clash with initrd */ + reserve_early(kva_start_pfn<<PAGE_SHIFT, + (kva_start_pfn + kva_pages)<<PAGE_SHIFT, + "KVA PG"); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > system_max_low_pfn) - highstart_pfn = system_max_low_pfn; + if (max_pfn > max_low_pfn) + highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - num_physpages = system_max_low_pfn; - high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(system_max_low_pfn)); - printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", - min_low_pfn, max_low_pfn, highstart_pfn); + pages_to_mb(max_low_pfn)); + printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", + max_low_pfn, highstart_pfn); - printk("Low memory ends at vaddr %08lx\n", + printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { init_remap_allocator(nid); allocate_pgdat(nid); } - printk("High memory starts at vaddr %08lx\n", + remap_numa_kva(); + + printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); for_each_online_node(nid) propagate_e820_map_node(nid); - memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); + for_each_online_node(nid) + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + NODE_DATA(0)->bdata = &node0_bdata; setup_bootmem_allocator(); - return max_low_pfn; -} - -void __init numa_kva_reserve(void) -{ - if (kva_pages) - reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), - BOOTMEM_DEFAULT); } -void __init zone_sizes_init(void) -{ - int nid; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; -#ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; -#endif - - /* If SRAT has not registered memory, register it now */ - if (find_max_pfn_with_active_regions() == 0) { - for_each_online_node(nid) { - if (node_has_online_mem(nid)) - add_active_range(nid, node_start_pfn[nid], - node_end_pfn[nid]); - } - } - - free_area_init_nodes(max_zone_pfns); - return; -} - -void __init set_highmem_pages_init(int bad_ppro) +void __init set_highmem_pages_init(void) { #ifdef CONFIG_HIGHMEM struct zone *zone; - struct page *page; + int nid; for_each_zone(zone) { - unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + unsigned long zone_start_pfn, zone_end_pfn; if (!is_highmem(zone)) continue; @@ -434,16 +404,12 @@ void __init set_highmem_pages_init(int bad_ppro) zone_start_pfn = zone->zone_start_pfn; zone_end_pfn = zone_start_pfn + zone->spanned_pages; - printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone_to_nid(zone), - zone_start_pfn, zone_end_pfn); + nid = zone_to_nid(zone); + printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, nid, zone_start_pfn, zone_end_pfn); - for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page, node_pfn, bad_ppro); - } + add_highpages_with_active_regions(nid, zone_start_pfn, + zone_end_pfn); } totalram_pages += totalhigh_pages; #endif @@ -476,3 +442,4 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif + diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2c24bea92c66..0bb0caed8971 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = { { 0, "User Space" }, #ifdef CONFIG_X86_64 { 0x8000000000000000UL, "Kernel Space" }, - { 0xffff810000000000UL, "Low Kernel Mapping" }, + { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, { __START_KERNEL_map, "High Kernel Mapping" }, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8bcb6f40ccb6..d0f5fce77d95 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ -#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { -#else - if (!user_mode(regs)) { -#endif preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -396,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "NULL pointer dereference"); else printk(KERN_CONT "paging request"); -#ifdef CONFIG_X86_32 - printk(KERN_CONT " at %08lx\n", address); -#else - printk(KERN_CONT " at %016lx\n", address); -#endif + printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_ALERT "IP:"); printk_address(regs->ip, 1); dump_pagetable(address); @@ -800,14 +792,10 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( -#ifdef CONFIG_X86_32 - "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", -#else - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", -#endif + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, regs->ip, - regs->sp, error_code); + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } @@ -915,14 +903,7 @@ LIST_HEAD(pgd_list); void vmalloc_sync_all(void) { #ifdef CONFIG_X86_32 - /* - * Note that races in the updates of insync and start aren't - * problematic: insync can only get set bits added, and updates to - * start are only improving performance (without affecting correctness - * if undone). - */ - static DECLARE_BITMAP(insync, PTRS_PER_PGD); - static unsigned long start = TASK_SIZE; + unsigned long start = VMALLOC_START & PGDIR_MASK; unsigned long address; if (SHARED_KERNEL_PMD) @@ -930,56 +911,38 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { - if (!test_bit(pgd_index(address), insync)) { - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), - address)) - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); - if (!page) - set_bit(pgd_index(address), insync); + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!vmalloc_sync_one(page_address(page), + address)) + break; } - if (address == start && test_bit(pgd_index(address), insync)) - start = address + PGDIR_SIZE; + spin_unlock_irqrestore(&pgd_lock, flags); } #else /* CONFIG_X86_64 */ - /* - * Note that races in the updates of insync and start aren't - * problematic: insync can only get set bits added, and updates to - * start are only improving performance (without affecting correctness - * if undone). - */ - static DECLARE_BITMAP(insync, PTRS_PER_PGD); - static unsigned long start = VMALLOC_START & PGDIR_MASK; + unsigned long start = VMALLOC_START & PGDIR_MASK; unsigned long address; for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { - if (!test_bit(pgd_index(address), insync)) { - const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; - struct page *page; - - if (pgd_none(*pgd_ref)) - continue; - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - } - spin_unlock_irqrestore(&pgd_lock, flags); - set_bit(pgd_index(address), insync); + const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } - if (address == start) - start = address + PGDIR_SIZE; + spin_unlock_irqrestore(&pgd_lock, flags); } #endif } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b6..b5a0fd5f4c5f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -57,6 +57,27 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); + +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + +static int __initdata after_init_bootmem; + +static __init void *alloc_low_page(unsigned long *phys) +{ + unsigned long pfn = table_end++; + void *adr; + + if (pfn >= table_top) + panic("alloc_low_page: ran out of memory"); + + adr = __va(pfn * PAGE_SIZE); + memset(adr, 0, PAGE_SIZE); + *phys = pfn * PAGE_SIZE; + return adr; +} + /* * Creates a middle page table and puts a pointer to it in the * given global directory entry. This only returns the gd entry @@ -68,9 +89,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) pmd_t *pmd_table; #ifdef CONFIG_X86_PAE + unsigned long phys; if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - + if (after_init_bootmem) + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + else + pmd_table = (pmd_t *)alloc_low_page(&phys); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); @@ -92,12 +116,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; + if (after_init_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif - if (!page_table) { - page_table = + if (!page_table) + page_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + } else { + unsigned long phys; + page_table = (pte_t *)alloc_low_page(&phys); } paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); @@ -155,38 +183,44 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) +static void __init kernel_physical_mapping_init(pgd_t *pgd_base, + unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; pgd_t *pgd; pmd_t *pmd; pte_t *pte; + unsigned pages_2m = 0, pages_4k = 0; - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; + if (!cpu_has_pse) + use_pse = 0; + pfn = start_pfn; + pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); + pgd = pgd_base + pgd_idx; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; - for (pmd_idx = 0; - pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; + if (pfn >= end_pfn) + continue; +#ifdef CONFIG_X86_PAE + pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); + pmd += pmd_idx; +#else + pmd_idx = 0; +#endif + for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; pmd++, pmd_idx++) { unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; /* * Map with big pages if possible, otherwise * create normal page tables: - * - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. */ - if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { + if (use_pse) { unsigned int addr2; pgprot_t prot = PAGE_KERNEL_LARGE; @@ -197,34 +231,30 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; + pages_2m++; set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; - max_pfn_mapped = pfn; continue; } pte = one_page_table_init(pmd); - for (pte_ofs = 0; - pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; + pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); + pte += pte_ofs; + for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { pgprot_t prot = PAGE_KERNEL; if (is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; + pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } - max_pfn_mapped = pfn; } } -} - -static inline int page_kills_ppro(unsigned long pagenr) -{ - if (pagenr >= 0x70000 && pagenr <= 0x7003F) - return 1; - return 0; + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } /* @@ -287,29 +317,62 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __init add_one_highpage_init(struct page *page, int pfn) { - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } else - SetPageReserved(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; } -#ifndef CONFIG_NUMA -static void __init set_highmem_pages_init(int bad_ppro) +struct add_highpages_data { + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static int __init add_highpages_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) { - int pfn; + int node_pfn; + struct page *page; + unsigned long final_start_pfn, final_end_pfn; + struct add_highpages_data *data; - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { - /* - * Holes under sparsemem might not have no mem_map[]: - */ - if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + data = (struct add_highpages_data *)datax; + + final_start_pfn = max(start_pfn, data->start_pfn); + final_end_pfn = min(end_pfn, data->end_pfn); + if (final_start_pfn >= final_end_pfn) + return 0; + + for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; + node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page, node_pfn); } + + return 0; + +} + +void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + struct add_highpages_data data; + + data.start_pfn = start_pfn; + data.end_pfn = end_pfn; + + work_with_active_regions(nid, add_highpages_work_fn, &data); +} + +#ifndef CONFIG_NUMA +static void __init set_highmem_pages_init(void) +{ + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); + totalram_pages += totalhigh_pages; } #endif /* !CONFIG_NUMA */ @@ -317,14 +380,9 @@ static void __init set_highmem_pages_init(int bad_ppro) #else # define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init(bad_ppro) do { } while (0) +# define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ -pteval_t __PAGE_KERNEL = _PAGE_KERNEL; -EXPORT_SYMBOL(__PAGE_KERNEL); - -pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; - void __init native_pagetable_setup_start(pgd_t *base) { unsigned long pfn, va; @@ -380,27 +438,10 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init pagetable_init(void) +static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) { - pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; - paravirt_pagetable_setup_start(pgd_base); - - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; - } - - kernel_physical_mapping_init(pgd_base); - remap_numa_kva(); - /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): @@ -410,6 +451,13 @@ static void __init pagetable_init(void) end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; page_table_range_init(vaddr, end, pgd_base); early_ioremap_reset(); +} + +static void __init pagetable_init(void) +{ + pgd_t *pgd_base = swapper_pg_dir; + + paravirt_pagetable_setup_start(pgd_base); permanent_kmaps_init(pgd_base); @@ -456,7 +504,7 @@ void zap_low_mappings(void) int nx_enabled; -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); EXPORT_SYMBOL_GPL(__supported_pte_mask); #ifdef CONFIG_X86_PAE @@ -509,27 +557,318 @@ static void __init set_nx(void) } #endif +/* user-defined highmem size */ +static unsigned int highmem_pages = -1; + /* - * paging_init() sets up the page tables - note that the first 8MB are - * already mapped by head.S. - * - * This routines also unmaps the page at virtual kernel address 0, so - * that we can trap those pesky NULL-reference errors in the kernel. + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. */ -void __init paging_init(void) +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; + + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); + +/* + * Determine low and high memory ranges: + */ +void __init find_low_pfn_range(void) { + /* it could update max_pfn */ + + /* max_low_pfn is 0, we already have early_res support */ + + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + if (highmem_pages == -1) + highmem_pages = max_pfn - MAXMEM_PFN; + if (highmem_pages + MAXMEM_PFN < max_pfn) + max_pfn = MAXMEM_PFN + highmem_pages; + if (highmem_pages + MAXMEM_PFN > max_pfn) { + printk(KERN_WARNING "only %luMB highmem pages " + "available, ignoring highmem size of %uMB.\n", + pages_to_mb(max_pfn - MAXMEM_PFN), + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING + "Use a HIGHMEM64G enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + max_pfn = MAXMEM_PFN; +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_HIGHMEM64G + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used." + "Use a HIGHMEM64G enabled kernel.\n"); + } +#endif /* !CONFIG_HIGHMEM64G */ +#endif /* !CONFIG_HIGHMEM */ + } else { + if (highmem_pages == -1) + highmem_pages = 0; +#ifdef CONFIG_HIGHMEM + if (highmem_pages >= max_pfn) { + printk(KERN_ERR "highmem size specified (%uMB) is " + "bigger than pages available (%luMB)!.\n", + pages_to_mb(highmem_pages), + pages_to_mb(max_pfn)); + highmem_pages = 0; + } + if (highmem_pages) { + if (max_low_pfn - highmem_pages < + 64*1024*1024/PAGE_SIZE){ + printk(KERN_ERR "highmem size %uMB results in " + "smaller than 64MB lowmem, ignoring it.\n" + , pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn -= highmem_pages; + } +#else + if (highmem_pages) + printk(KERN_ERR "ignoring highmem size on non-highmem" + " kernel!\n"); +#endif + } +} + +#ifndef CONFIG_NEED_MULTIPLE_NODES +void __init initmem_init(unsigned long start_pfn, + unsigned long end_pfn) +{ +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > max_low_pfn) + highstart_pfn = max_low_pfn; + memory_present(0, 0, highend_pfn); + e820_register_active_regions(0, 0, highend_pfn); + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + memory_present(0, 0, max_low_pfn); + e820_register_active_regions(0, 0, max_low_pfn); + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); + + setup_bootmem_allocator(); +} +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ + +static void __init zone_sizes_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; +#endif + + free_area_init_nodes(max_zone_pfns); +} + +void __init setup_bootmem_allocator(void) +{ + int i; + unsigned long bootmap_size, bootmap; + /* + * Initialize the boot-time allocator (with low memory only): + */ + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, + max_pfn_mapped<<PAGE_SHIFT, bootmap_size, + PAGE_SIZE); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n", bootmap_size); + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); + + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, + min_low_pfn, max_low_pfn); + printk(KERN_INFO " mapped low ram: 0 - %08lx\n", + max_pfn_mapped<<PAGE_SHIFT); + printk(KERN_INFO " low ram: %08lx - %08lx\n", + min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); + printk(KERN_INFO " bootmap %08lx - %08lx\n", + bootmap, bootmap + bootmap_size); + for_each_online_node(i) + free_bootmem_with_active_regions(i, max_low_pfn); + early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); + + after_init_bootmem = 1; +} + +static void __init find_early_table_space(unsigned long end) +{ + unsigned long puds, pmds, ptes, tables, start; + + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = PAGE_ALIGN(puds * sizeof(pud_t)); + + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); + + if (cpu_has_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + extra += PMD_SIZE; + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += PAGE_ALIGN(ptes * sizeof(pte_t)); + + /* for fixmap */ + tables += PAGE_SIZE * 2; + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, + tables, PAGE_SIZE); + if (table_start == -1UL) + panic("Cannot find space for the kernel page tables"); + + table_start >>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables>>PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, + (table_start << PAGE_SHIFT) + tables); +} + +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) +{ + pgd_t *pgd_base = swapper_pg_dir; + unsigned long start_pfn, end_pfn; + unsigned long big_page_start; + + /* + * Find space for the kernel direct mapping tables. + */ + if (!after_init_bootmem) + find_early_table_space(end); + #ifdef CONFIG_X86_PAE set_nx(); if (nx_enabled) printk(KERN_INFO "NX (Execute Disable) protection: active\n"); #endif - pagetable_init(); + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } + + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + big_page_start = PMD_SIZE; + + if (start < big_page_start) { + start_pfn = start >> PAGE_SHIFT; + end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); + } else { + /* head is not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + } + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); + + /* big page range */ + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < (big_page_start >> PAGE_SHIFT)) + start_pfn = big_page_start >> PAGE_SHIFT; + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, + cpu_has_pse); + + /* tail is not big page alignment ? */ + start_pfn = end_pfn; + if (start_pfn > (big_page_start>>PAGE_SHIFT)) { + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, + end_pfn, 0); + } + + early_ioremap_page_table_range_init(pgd_base); load_cr3(swapper_pg_dir); __flush_tlb_all(); + if (!after_init_bootmem) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + return end >> PAGE_SHIFT; +} + + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + pagetable_init(); + + __flush_tlb_all(); + kmap_init(); + + /* + * NOTE: at this point the bootmem allocator is fully available. + */ + sparse_init(); + zone_sizes_init(); + + paravirt_post_allocator_init(); } /* @@ -564,24 +903,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) { int codesize, reservedpages, datasize, initsize; - int tmp, bad_ppro; + int tmp; #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - bad_ppro = ppro_with_ram_bug(); - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } -#endif /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -593,7 +919,7 @@ void __init mem_init(void) if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) reservedpages++; - set_highmem_pages_init(bad_ppro); + set_highmem_pages_init(); codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; @@ -614,7 +940,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM @@ -655,7 +980,6 @@ void __init mem_init(void) #endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -784,3 +1108,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) free_init_pages("initrd memory", start, end); } #endif + +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) +{ + return reserve_bootmem(phys, len, flags); +} diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 156e6d7b0e32..48548ef7ddf8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/smp.h> #include <linux/init.h> +#include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> @@ -47,6 +48,13 @@ #include <asm/numa.h> #include <asm/cacheflush.h> +/* + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access + * apertures, ACPI and other tables without having to play with fixmaps. + */ +unsigned long max_pfn_mapped; + static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -135,26 +143,17 @@ static __init void *spp_getpage(void) return ptr; } -static void -set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) +void +set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) { - pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, new_pte; - - pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys); + pte_t *pte; - pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_ERR - "PGD FIXMAP MISSING, it should be setup in head.S!\n"); - return; - } - pud = pud_offset(pgd, vaddr); + pud = pud_page + pud_index(vaddr); if (pud_none(*pud)) { pmd = (pmd_t *) spp_getpage(); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); + pud_populate(&init_mm, pud, pmd); if (pmd != pmd_offset(pud, 0)) { printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud, 0)); @@ -164,13 +163,12 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd)) { pte = (pte_t *) spp_getpage(); - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); + pmd_populate_kernel(&init_mm, pmd, pte); if (pte != pte_offset_kernel(pmd, 0)) { printk(KERN_ERR "PAGETABLE BUG #02!\n"); return; } } - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); pte = pte_offset_kernel(pmd, vaddr); if (!pte_none(*pte) && pte_val(new_pte) && @@ -185,6 +183,64 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) __flush_tlb_one(vaddr); } +void +set_pte_vaddr(unsigned long vaddr, pte_t pteval) +{ + pgd_t *pgd; + pud_t *pud_page; + + pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval)); + + pgd = pgd_offset_k(vaddr); + if (pgd_none(*pgd)) { + printk(KERN_ERR + "PGD FIXMAP MISSING, it should be setup in head.S!\n"); + return; + } + pud_page = (pud_t*)pgd_page_vaddr(*pgd); + set_pte_vaddr_pud(pud_page, vaddr, pteval); +} + +/* + * Create large page table mappings for a range of physical addresses. + */ +static void __init __init_extra_mapping(unsigned long phys, unsigned long size, + pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); + for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { + pgd = pgd_offset_k((unsigned long)__va(phys)); + if (pgd_none(*pgd)) { + pud = (pud_t *) spp_getpage(); + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | + _PAGE_USER)); + } + pud = pud_offset(pgd, (unsigned long)__va(phys)); + if (pud_none(*pud)) { + pmd = (pmd_t *) spp_getpage(); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | + _PAGE_USER)); + } + pmd = pmd_offset(pud, phys); + BUG_ON(!pmd_none(*pmd)); + set_pmd(pmd, __pmd(phys | pgprot_val(prot))); + } +} + +void __init init_extra_mapping_wb(unsigned long phys, unsigned long size) +{ + __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE); +} + +void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) +{ + __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE); +} + /* * The head.S code sets up the kernel high mapping: * @@ -213,20 +269,9 @@ void __init cleanup_highmap(void) } } -/* NOTE: this is meant to be run only at boot */ -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk(KERN_ERR "Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, prot); -} - static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; static __meminit void *alloc_low_page(unsigned long *phys) { @@ -240,7 +285,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= end_pfn) + if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); @@ -257,65 +302,61 @@ static __meminit void unmap_low_page(void *adr) early_iounmap(adr, PAGE_SIZE); } -/* Must run before zap_low_mappings */ -__meminit void *early_ioremap(unsigned long addr, unsigned long size) +static unsigned long __meminit +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) { - pmd_t *pmd, *last_pmd; - unsigned long vaddr; - int i, pmds; + unsigned pages = 0; + unsigned long last_map_addr = end; + int i; + + pte_t *pte = pte_page + pte_index(addr); - pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; - vaddr = __START_KERNEL_map; - pmd = level2_kernel_pgt; - last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; + for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { - for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { - for (i = 0; i < pmds; i++) { - if (pmd_present(pmd[i])) - goto continue_outer_loop; + if (addr >= end) { + if (!after_bootmem) { + for(; i < PTRS_PER_PTE; i++, pte++) + set_pte(pte, __pte(0)); + } + break; } - vaddr += addr & ~PMD_MASK; - addr &= PMD_MASK; - for (i = 0; i < pmds; i++, addr += PMD_SIZE) - set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - __flush_tlb_all(); + if (pte_val(*pte)) + continue; - return (void *)vaddr; -continue_outer_loop: - ; + if (0) + printk(" pte=%p addr=%lx pte=%016lx\n", + pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); + set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); + last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; + pages++; } - printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size); + update_page_count(PG_LEVEL_4K, pages); - return NULL; + return last_map_addr; } -/* - * To avoid virtual aliases later: - */ -__meminit void early_iounmap(void *addr, unsigned long size) +static unsigned long __meminit +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) { - unsigned long vaddr; - pmd_t *pmd; - int i, pmds; - - vaddr = (unsigned long)addr; - pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; - pmd = level2_kernel_pgt + pmd_index(vaddr); - - for (i = 0; i < pmds; i++) - pmd_clear(pmd + i); + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); - __flush_tlb_all(); + return phys_pte_init(pte, address, end); } static unsigned long __meminit -phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, + unsigned long page_size_mask) { + unsigned long pages = 0; + unsigned long last_map_addr = end; + int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { + unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); + pte_t *pte; if (address >= end) { if (!after_bootmem) { @@ -325,31 +366,50 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) break; } - if (pmd_val(*pmd)) + if (pmd_val(*pmd)) { + if (!pmd_large(*pmd)) + last_map_addr = phys_pte_update(pmd, address, + end); continue; + } - set_pte((pte_t *)pmd, - pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + if (page_size_mask & (1<<PG_LEVEL_2M)) { + pages++; + set_pte((pte_t *)pmd, + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + last_map_addr = (address & PMD_MASK) + PMD_SIZE; + continue; + } + + pte = alloc_low_page(&pte_phys); + last_map_addr = phys_pte_init(pte, address, end); + unmap_low_page(pte); + + pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); } - return address; + update_page_count(PG_LEVEL_2M, pages); + return last_map_addr; } static unsigned long __meminit -phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) +phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, + unsigned long page_size_mask) { pmd_t *pmd = pmd_offset(pud, 0); unsigned long last_map_addr; spin_lock(&init_mm.page_table_lock); - last_map_addr = phys_pmd_init(pmd, address, end); + last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); spin_unlock(&init_mm.page_table_lock); __flush_tlb_all(); return last_map_addr; } static unsigned long __meminit -phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) +phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, + unsigned long page_size_mask) { + unsigned long pages = 0; unsigned long last_map_addr = end; int i = pud_index(addr); @@ -369,11 +429,13 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) if (pud_val(*pud)) { if (!pud_large(*pud)) - last_map_addr = phys_pmd_update(pud, addr, end); + last_map_addr = phys_pmd_update(pud, addr, end, + page_size_mask); continue; } - if (direct_gbpages) { + if (page_size_mask & (1<<PG_LEVEL_1G)) { + pages++; set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; @@ -383,27 +445,50 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) pmd = alloc_low_page(&pmd_phys); spin_lock(&init_mm.page_table_lock); - set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); - last_map_addr = phys_pmd_init(pmd, addr, end); + last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); + unmap_low_page(pmd); + pud_populate(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); - unmap_low_page(pmd); } __flush_tlb_all(); + update_page_count(PG_LEVEL_1G, pages); - return last_map_addr >> PAGE_SHIFT; + return last_map_addr; +} + +static unsigned long __meminit +phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long page_size_mask) +{ + pud_t *pud; + + pud = (pud_t *)pgd_page_vaddr(*pgd); + + return phys_pud_init(pud, addr, end, page_size_mask); } static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, tables, start; + unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); - if (!direct_gbpages) { + if (direct_gbpages) { + unsigned long extra; + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); - } + tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (cpu_has_pse) { + unsigned long extra; + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could @@ -417,10 +502,10 @@ static void __init find_early_table_space(unsigned long end) table_start >>= PAGE_SHIFT; table_end = table_start; + table_top = table_start + (tables >> PAGE_SHIFT); - early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, - (table_start << PAGE_SHIFT) + tables); + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); } static void __init init_gbpages(void) @@ -431,7 +516,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST_BOOTPARAM +#ifdef CONFIG_MEMTEST static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) @@ -493,7 +578,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, } -static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; +/* default is disabled */ +static int memtest_pattern __initdata; static int __init parse_memtest(char *arg) { @@ -526,7 +612,8 @@ static void __init early_memtest(unsigned long start, unsigned long end) t_size = end - t_start; printk(KERN_CONT "\n %016llx - %016llx pattern %d", - t_start, t_start + t_size, pattern); + (unsigned long long)t_start, + (unsigned long long)t_start + t_size, pattern); memtest(t_start, t_size, pattern); @@ -541,15 +628,85 @@ static void __init early_memtest(unsigned long start, unsigned long end) } #endif +static unsigned long __init kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) +{ + + unsigned long next, last_map_addr = end; + + start = (unsigned long)__va(start); + end = (unsigned long)__va(end); + + for (; start < end; start = next) { + pgd_t *pgd = pgd_offset_k(start); + unsigned long pud_phys; + pud_t *pud; + + next = start + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_val(*pgd)) { + last_map_addr = phys_pud_update(pgd, __pa(start), + __pa(end), page_size_mask); + continue; + } + + if (after_bootmem) + pud = pud_offset(pgd, start & PGDIR_MASK); + else + pud = alloc_low_page(&pud_phys); + + last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), + page_size_mask); + unmap_low_page(pud); + pgd_populate(&init_mm, pgd_offset_k(start), + __va(pud_phys)); + } + + return last_map_addr; +} + +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#define NR_RANGE_MR 5 + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<<PAGE_SHIFT; + mr[nr_range].end = end_pfn<<PAGE_SHIFT; + mr[nr_range].page_size_mask = page_size_mask; + nr_range++; + } + + return nr_range; +} + /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ -unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) { - unsigned long next, last_map_addr = end; - unsigned long start_phys = start, end_phys = end; + unsigned long last_map_addr = 0; + unsigned long page_size_mask = 0; + unsigned long start_pfn, end_pfn; + + struct map_range mr[NR_RANGE_MR]; + int nr_range, i; printk(KERN_INFO "init_memory_mapping\n"); @@ -560,48 +717,101 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon * memory mapped. Unfortunately this is done currently before the * nodes are discovered. */ - if (!after_bootmem) { + if (!after_bootmem) init_gbpages(); - find_early_table_space(end); - } - start = (unsigned long)__va(start); - end = (unsigned long)__va(end); + if (direct_gbpages) + page_size_mask |= 1 << PG_LEVEL_1G; + if (cpu_has_pse) + page_size_mask |= 1 << PG_LEVEL_2M; + + memset(mr, 0, sizeof(mr)); + nr_range = 0; + + /* head if not big page alignment ?*/ + start_pfn = start >> PAGE_SHIFT; + end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* big page (2M) range*/ + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + + /* big page (1G) range */ + start_pfn = end_pfn; + end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); + + /* tail is not big page (1G) alignment */ + start_pfn = end_pfn; + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + + /* tail is not big page (2M) alignment */ + start_pfn = end_pfn; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( + (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); - for (; start < end; start = next) { - pgd_t *pgd = pgd_offset_k(start); - unsigned long pud_phys; - pud_t *pud; - - if (after_bootmem) - pud = pud_offset(pgd, start & PGDIR_MASK); - else - pud = alloc_low_page(&pud_phys); + if (!after_bootmem) + find_early_table_space(end); - next = start + PGDIR_SIZE; - if (next > end) - next = end; - last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); - if (!after_bootmem) - set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); - unmap_low_page(pud); - } + for (i = 0; i < nr_range; i++) + last_map_addr = kernel_physical_mapping_init( + mr[i].start, mr[i].end, + mr[i].page_size_mask); if (!after_bootmem) mmu_cr4_features = read_cr4(); __flush_tlb_all(); - if (!after_bootmem) + if (!after_bootmem && table_end > table_start) reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT, "PGTABLE"); + printk(KERN_INFO "last_map_addr: %lx end: %lx\n", + last_map_addr, end); + if (!after_bootmem) - early_memtest(start_phys, end_phys); + early_memtest(start, end); - return last_map_addr; + return last_map_addr >> PAGE_SHIFT; } #ifndef CONFIG_NUMA +void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long bootmap_size, bootmap; + + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, + PAGE_SIZE); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n", bootmap_size); + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, + 0, end_pfn); + e820_register_active_regions(0, start_pfn, end_pfn); + free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); + reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); +} + void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -609,9 +819,9 @@ void __init paging_init(void) memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; - max_zone_pfns[ZONE_NORMAL] = end_pfn; + max_zone_pfns[ZONE_NORMAL] = max_pfn; - memory_present(0, 0, end_pfn); + memory_present(0, 0, max_pfn); sparse_init(); free_area_init_nodes(max_zone_pfns); } @@ -693,8 +903,8 @@ void __init mem_init(void) #else totalram_pages = free_all_bootmem(); #endif - reservedpages = end_pfn - totalram_pages - - absent_pages_in_range(0, end_pfn); + reservedpages = max_pfn - totalram_pages - + absent_pages_in_range(0, max_pfn); after_bootmem = 1; codesize = (unsigned long) &_etext - (unsigned long) &_text; @@ -713,7 +923,7 @@ void __init mem_init(void) printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " "%ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - end_pfn << (PAGE_SHIFT-10), + max_pfn << (PAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, @@ -798,24 +1008,26 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init reserve_bootmem_generic(unsigned long phys, unsigned len) +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) { #ifdef CONFIG_NUMA int nid, next_nid; + int ret; #endif unsigned long pfn = phys >> PAGE_SHIFT; - if (pfn >= end_pfn) { + if (pfn >= max_pfn) { /* * This can happen with kdump kernels when accessing * firmware tables: */ if (pfn < max_pfn_mapped) - return; + return -EFAULT; - printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n", phys, len); - return; + return -EFAULT; } /* Should check here against the e820 map to avoid double free */ @@ -823,9 +1035,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) nid = phys_to_nid(phys); next_nid = phys_to_nid(phys + len - 1); if (nid == next_nid) - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); else - reserve_bootmem(phys, len, BOOTMEM_DEFAULT); + ret = reserve_bootmem(phys, len, flags); + + if (ret != 0) + return ret; + #else reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif @@ -834,6 +1050,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) dma_reserve += len / PAGE_SIZE; set_dma_reserve(dma_reserve); } + + return 0; } int kern_addr_valid(unsigned long addr) @@ -938,7 +1156,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) pmd_t *pmd; for (; addr < end; addr = next) { - next = pmd_addr_end(addr, end); + void *p = NULL; pgd = vmemmap_pgd_populate(addr, node); if (!pgd) @@ -948,33 +1166,51 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) if (!pud) return -ENOMEM; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte_t entry; - void *p; + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + pmd = vmemmap_pmd_populate(pud, addr, node); + + if (!pmd) + return -ENOMEM; + + p = vmemmap_pte_populate(pmd, addr, node); - p = vmemmap_alloc_block(PMD_SIZE, node); if (!p) return -ENOMEM; - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE); - set_pmd(pmd, __pmd(pte_val(entry))); - - /* check to see if we have contiguous blocks */ - if (p_end != p || node_start != node) { - if (p_start) - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", - addr_start, addr_end-1, p_start, p_end-1, node_start); - addr_start = addr; - node_start = node; - p_start = p; - } - addr_end = addr + PMD_SIZE; - p_end = p + PMD_SIZE; + addr_end = addr + PAGE_SIZE; + p_end = p + PAGE_SIZE; } else { - vmemmap_verify((pte_t *)pmd, node, addr, next); + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + + p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, + PAGE_KERNEL_LARGE); + set_pmd(pmd, __pmd(pte_val(entry))); + + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; + } + + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); } + } return 0; } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f9b683..45e546c4ba78 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't remap the low PCI/ISA area, it's always mapped.. */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + if (is_ISA_range(phys_addr, last_addr)) return (__force void __iomem *)phys_to_virt(phys_addr); /* @@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. @@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { - if (pat_wc_enabled) + if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, __builtin_return_address(0)); else @@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr) * vm_area and by simply returning an address into the kernel mapping * of ISA space. So handle that here. */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) + if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && + (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) return; addr = (volatile void __iomem *) @@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr) cpa takes care of the direct mappings. */ read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) { - if (p->addr == addr) + if (p->addr == (void __force *)addr) break; } read_unlock(&vmlist_lock); @@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr) free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ - o = remove_vm_area((void *)addr); + o = remove_vm_area((void __force *)addr); BUG_ON(p != o || o == NULL); kfree(p); } @@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void *)ioremap(start, PAGE_SIZE); + addr = (void __force *)ioremap(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); @@ -381,8 +381,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -#ifdef CONFIG_X86_32 - int __initdata early_ioremap_debug; static int __init early_ioremap_debug_setup(char *str) @@ -394,8 +392,7 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] - __section(.bss.page_aligned); +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -484,10 +481,11 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, return; } pte = early_ioremap_pte(addr); + if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else - pte_clear(NULL, addr, pte); + pte_clear(&init_mm, addr, pte); __flush_tlb_one(addr); } @@ -625,5 +623,3 @@ void __this_fixmap_does_not_exist(void) { WARN_ON(1); } - -#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e477844..41f1b5c00a1d 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -22,6 +22,7 @@ #include <asm/numa.h> #include <asm/mpspec.h> #include <asm/apic.h> +#include <asm/k8.h> static __init int find_northbridge(void) { @@ -56,34 +57,33 @@ static __init void early_get_boot_cpu_id(void) /* * Find possible boot-time SMP configuration: */ +#ifdef CONFIG_X86_MPPARSE early_find_smp_config(); +#endif #ifdef CONFIG_ACPI /* * Read APIC information from ACPI tables. */ early_acpi_boot_init(); #endif +#ifdef CONFIG_X86_MPPARSE /* * get boot-time SMP configuration: */ if (smp_found_config) early_get_smp_config(); +#endif early_init_lapic_mapping(); } int __init k8_scan_nodes(unsigned long start, unsigned long end) { + unsigned numnodes, cores, bits, apicid_base; unsigned long prevbase; struct bootnode nodes[8]; - int nodeid, i, nb; unsigned char nodeids[8]; - int found = 0; - u32 reg; - unsigned numnodes; - unsigned cores; - unsigned bits; - int j; - unsigned apicid_base; + int i, j, nb, found = 0; + u32 nodeid, reg; if (!early_pci_allowed()) return -1; @@ -105,7 +105,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base, limit; - u32 nodeid; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); @@ -144,8 +143,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) limit |= (1<<24)-1; limit++; - if (limit > end_pfn << PAGE_SHIFT) - limit = end_pfn << PAGE_SHIFT; + if (limit > max_pfn << PAGE_SHIFT) + limit = max_pfn << PAGE_SHIFT; if (limit <= base) continue; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..b432d5781773 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -27,30 +27,17 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -bootmem_data_t plat_node_bdata[MAX_NUMNODES]; +static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; -#ifdef CONFIG_SMP -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -void *x86_cpu_to_node_map_early_ptr; -EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); -#endif -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); - s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_to_cpumask_map); - int numa_off __initdata; -unsigned long __initdata nodemap_addr; -unsigned long __initdata nodemap_size; +static unsigned long __initdata nodemap_addr; +static unsigned long __initdata nodemap_size; /* * Given a shift value, try to populate memnodemap[] @@ -99,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void) addr = 0x8000; nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); - nodemap_addr = find_e820_area(addr, end_pfn<<PAGE_SHIFT, + nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, nodemap_size, L1_CACHE_BYTES); if (nodemap_addr == -1UL) { printk(KERN_ERR @@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start, void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { - unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; + unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); @@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, start, end); start_pfn = start >> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; + last_pfn = end >> PAGE_SHIFT; node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, SMP_CACHE_BYTES); @@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; /* * Find a place for the bootmem map @@ -226,14 +213,14 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, * early_node_mem will get that with find_e820_area instead * of alloc_bootmem, that could clash with reserved range */ - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); nid = phys_to_nid(nodedata_phys); if (nid == nodeid) bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); else bootmap_start = round_up(start, PAGE_SIZE); /* - * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like + * SMP_CACHE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE */ bootmap = early_node_mem(nodeid, bootmap_start, end, @@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_start >> PAGE_SHIFT, - start_pfn, end_pfn); + start_pfn, last_pfn); printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", bootmap_start, bootmap_start + bootmap_size - 1, @@ -309,7 +296,7 @@ void __init numa_init_array(void) #ifdef CONFIG_NUMA_EMU /* Numa emulation */ -char *cmdline __initdata; +static char *cmdline __initdata; /* * Setups up nid to range from addr to addr + size. If the end @@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, } /* - * Sets up the system RAM area from start_pfn to end_pfn according to the + * Sets up the system RAM area from start_pfn to last_pfn according to the * numa=fake command-line option. */ static struct bootnode nodes[MAX_NUMNODES] __initdata; -static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) +static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) { u64 size, addr = start_pfn << PAGE_SHIFT; - u64 max_addr = end_pfn << PAGE_SHIFT; + u64 max_addr = last_pfn << PAGE_SHIFT; int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; memset(&nodes, 0, sizeof(nodes)); @@ -527,7 +514,7 @@ out: } #endif /* CONFIG_NUMA_EMU */ -void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) +void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) { int i; @@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) nodes_clear(node_online_map); #ifdef CONFIG_NUMA_EMU - if (cmdline && !numa_emulation(start_pfn, end_pfn)) + if (cmdline && !numa_emulation(start_pfn, last_pfn)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) #ifdef CONFIG_ACPI_NUMA if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT)) + last_pfn << PAGE_SHIFT)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) #ifdef CONFIG_K8_NUMA if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, - end_pfn<<PAGE_SHIFT)) + last_pfn<<PAGE_SHIFT)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) printk(KERN_INFO "Faking a node at %016lx-%016lx\n", start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT); + last_pfn << PAGE_SHIFT); /* setup dummy node covering all memory */ memnode_shift = 63; memnodemap = memnode.embedded_map; @@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) node_set(0, node_possible_map); for (i = 0; i < NR_CPUS; i++) numa_set_node(i, 0); - /* cpumask_of_cpu() may not be available during early startup */ - memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); - cpu_set(0, node_to_cpumask_map[0]); - e820_register_active_regions(0, start_pfn, end_pfn); - setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); -} - -__cpuinit void numa_add_cpu(int cpu) -{ - set_bit(cpu, - (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if(cpu_to_node_map) - cpu_to_node_map[cpu] = node; - else if(per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; - else - Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); + e820_register_active_regions(0, start_pfn, last_pfn); + setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); } unsigned long __init numa_free_all_bootmem(void) @@ -613,7 +579,7 @@ void __init paging_init(void) memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; - max_zone_pfns[ZONE_NORMAL] = end_pfn; + max_zone_pfns[ZONE_NORMAL] = max_pfn; sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); @@ -641,6 +607,7 @@ static __init int numa_setup(char *opt) } early_param("numa", numa_setup); +#ifdef CONFIG_NUMA /* * Setup early cpu_to_node. * @@ -652,14 +619,19 @@ early_param("numa", numa_setup); * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. + * + * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { - int i; + int cpu; + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - for (i = 0; i < NR_CPUS; i++) { + BUG_ON(cpu_to_apicid == NULL); + + for_each_possible_cpu(cpu) { int node; - u16 apicid = x86_cpu_to_apicid_init[i]; + u16 apicid = cpu_to_apicid[cpu]; if (apicid == BAD_APICID) continue; @@ -668,8 +640,9 @@ void __init init_cpu_to_node(void) continue; if (!node_online(node)) continue; - numa_set_node(i, node); + numa_set_node(cpu, node); } } +#endif diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 75f1b109aae8..0dcd42eb94e6 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -1,8 +1,8 @@ /* * self test for change_page_attr. * - * Clears the global bit on random pages in the direct mapping, then reverts - * and compares page tables forwards and afterwards. + * Clears the a test pte bit on random pages in the direct mapping, + * then reverts and compares page tables forwards and afterwards. */ #include <linux/bootmem.h> #include <linux/kthread.h> @@ -32,6 +32,13 @@ enum { GPS = (1<<30) }; +#define PAGE_TESTBIT __pgprot(_PAGE_UNUSED1) + +static int pte_testbit(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UNUSED1; +} + struct split_state { long lpg, gpg, spg, exec; long min_exec, max_exec; @@ -165,15 +172,14 @@ static int pageattr_test(void) continue; } - err = change_page_attr_clear(addr[i], len[i], - __pgprot(_PAGE_GLOBAL)); + err = change_page_attr_set(addr[i], len[i], PAGE_TESTBIT); if (err < 0) { printk(KERN_ERR "CPA %d failed %d\n", i, err); failed++; } pte = lookup_address(addr[i], &level); - if (!pte || pte_global(*pte) || pte_huge(*pte)) { + if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) { printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i], pte ? (u64)pte_val(*pte) : 0ULL); failed++; @@ -198,14 +204,13 @@ static int pageattr_test(void) failed++; continue; } - err = change_page_attr_set(addr[i], len[i], - __pgprot(_PAGE_GLOBAL)); + err = change_page_attr_clear(addr[i], len[i], PAGE_TESTBIT); if (err < 0) { printk(KERN_ERR "CPA reverting failed: %d\n", err); failed++; } pte = lookup_address(addr[i], &level); - if (!pte || !pte_global(*pte)) { + if (!pte || pte_testbit(*pte)) { printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n", addr[i], pte ? (u64)pte_val(*pte) : 0ULL); failed++; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37e..afd40054d157 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -34,6 +34,41 @@ struct cpa_data { unsigned force_split : 1; }; +#ifdef CONFIG_PROC_FS +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void update_page_count(int level, unsigned long pages) +{ + unsigned long flags; + + /* Protect against CPA */ + spin_lock_irqsave(&pgd_lock, flags); + direct_pages_count[level] += pages; + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void split_page_count(int level) +{ + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; +} + +int arch_report_meminfo(char *page) +{ + int n = sprintf(page, "DirectMap4k: %8lu\n" + "DirectMap2M: %8lu\n", + direct_pages_count[PG_LEVEL_4K], + direct_pages_count[PG_LEVEL_2M]); +#ifdef CONFIG_X86_64 + n += sprintf(page + n, "DirectMap1G: %8lu\n", + direct_pages_count[PG_LEVEL_1G]); +#endif + return n; +} +#else +static inline void split_page_count(int level) { } +#endif + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -500,6 +535,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + if (address >= (unsigned long)__va(0) && + address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) + split_page_count(level); + /* * Install the new, split up pagetable. Important details here: * @@ -805,7 +844,7 @@ int _set_memory_wc(unsigned long addr, int numpages) int set_memory_wc(unsigned long addr, int numpages) { - if (!pat_wc_enabled) + if (!pat_enabled) return set_memory_uc(addr, numpages); if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 06b7a1c90fb8..a885a1019b8a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -26,11 +26,11 @@ #include <asm/io.h> #ifdef CONFIG_X86_PAT -int __read_mostly pat_wc_enabled = 1; +int __read_mostly pat_enabled = 1; void __cpuinit pat_disable(char *reason) { - pat_wc_enabled = 0; + pat_enabled = 0; printk(KERN_INFO "%s\n", reason); } @@ -42,6 +42,19 @@ static int __init nopat(char *str) early_param("nopat", nopat); #endif + +static int debug_enable; +static int __init pat_debug_setup(char *str) +{ + debug_enable = 1; + return 0; +} +__setup("debugpat", pat_debug_setup); + +#define dprintk(fmt, arg...) \ + do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + + static u64 __read_mostly boot_pat_state; enum { @@ -53,24 +66,25 @@ enum { PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ }; -#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) +#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) void pat_init(void) { u64 pat; - if (!pat_wc_enabled) + if (!pat_enabled) return; /* Paranoia check. */ - if (!cpu_has_pat) { - printk(KERN_ERR "PAT enabled, but CPU feature cleared\n"); + if (!cpu_has_pat && boot_pat_state) { /* - * Panic if this happens on the secondary CPU, and we + * If this happens we are on a secondary CPU, but * switched to PAT on the boot CPU. We have no way to * undo PAT. - */ - BUG_ON(boot_pat_state); + */ + printk(KERN_ERR "PAT enabled, " + "but not supported by secondary CPU\n"); + BUG(); } /* Set PWT to Write-Combining. All other bits stay the same */ @@ -86,8 +100,8 @@ void pat_init(void) * 011 UC _PAGE_CACHE_UC * PAT bit unused */ - pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | - PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); /* Boot CPU check */ if (!boot_pat_state) @@ -103,11 +117,11 @@ void pat_init(void) static char *cattr_name(unsigned long flags) { switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - default: return "broken"; + case _PAGE_CACHE_UC: return "uncached"; + case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; + case _PAGE_CACHE_WB: return "write-back"; + case _PAGE_CACHE_WC: return "write-combining"; + default: return "broken"; } } @@ -145,47 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ * The intersection is based on "Effective Memory Type" tables in IA-32 * SDM vol 3a */ -static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, - unsigned long *ret_prot) +static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) { - unsigned long pat_type; - u8 mtrr_type; - - pat_type = prot & _PAGE_CACHE_MASK; - prot &= (~_PAGE_CACHE_MASK); - - /* - * We return the PAT request directly for types where PAT takes - * precedence with respect to MTRR and for UC_MINUS. - * Consistency checks with other PAT requests is done later - * while going through memtype list. - */ - if (pat_type == _PAGE_CACHE_WC) { - *ret_prot = prot | _PAGE_CACHE_WC; - return 0; - } else if (pat_type == _PAGE_CACHE_UC_MINUS) { - *ret_prot = prot | _PAGE_CACHE_UC_MINUS; - return 0; - } else if (pat_type == _PAGE_CACHE_UC) { - *ret_prot = prot | _PAGE_CACHE_UC; - return 0; - } - /* * Look for MTRR hint to get the effective type in case where PAT * request is for WB. */ - mtrr_type = mtrr_type_lookup(start, end); + if (req_type == _PAGE_CACHE_WB) { + u8 mtrr_type; + + mtrr_type = mtrr_type_lookup(start, end); + if (mtrr_type == MTRR_TYPE_UNCACHABLE) + return _PAGE_CACHE_UC; + if (mtrr_type == MTRR_TYPE_WRCOMB) + return _PAGE_CACHE_WC; + } - if (mtrr_type == MTRR_TYPE_UNCACHABLE) { - *ret_prot = prot | _PAGE_CACHE_UC; - } else if (mtrr_type == MTRR_TYPE_WRCOMB) { - *ret_prot = prot | _PAGE_CACHE_WC; - } else { - *ret_prot = prot | _PAGE_CACHE_WB; + return req_type; +} + +static int chk_conflict(struct memtype *new, struct memtype *entry, + unsigned long *type) +{ + if (new->type != entry->type) { + if (type) { + new->type = entry->type; + *type = entry->type; + } else + goto conflict; } + /* check overlaps with more than one entry in the list */ + list_for_each_entry_continue(entry, &memtype_list, nd) { + if (new->end <= entry->start) + break; + else if (new->type != entry->type) + goto conflict; + } return 0; + + conflict: + printk(KERN_INFO "%s:%d conflicting memory types " + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, + new->end, cattr_name(new->type), cattr_name(entry->type)); + return -EBUSY; } /* @@ -198,37 +215,36 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, * req_type will have a special case value '-1', when requester want to inherit * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. * - * If ret_type is NULL, function will return an error if it cannot reserve the - * region with req_type. If ret_type is non-null, function will return - * available type in ret_type in case of no error. In case of any error + * If new_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If new_type is non-NULL, function will return + * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ int reserve_memtype(u64 start, u64 end, unsigned long req_type, - unsigned long *ret_type) + unsigned long *new_type) { - struct memtype *new_entry = NULL; - struct memtype *parse; + struct memtype *new, *entry; unsigned long actual_type; + struct list_head *where; int err = 0; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + BUG_ON(start >= end); /* end is exclusive */ + + if (!pat_enabled) { /* This is identical to page table setting without PAT */ - if (ret_type) { - if (req_type == -1) { - *ret_type = _PAGE_CACHE_WB; - } else { - *ret_type = req_type; - } + if (new_type) { + if (req_type == -1) + *new_type = _PAGE_CACHE_WB; + else + *new_type = req_type & _PAGE_CACHE_MASK; } return 0; } /* Low ISA region is always mapped WB in page table. No need to track */ - if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { - if (ret_type) - *ret_type = _PAGE_CACHE_WB; - + if (is_ISA_range(start, end - 1)) { + if (new_type) + *new_type = _PAGE_CACHE_WB; return 0; } @@ -241,206 +257,92 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, */ u8 mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == MTRR_TYPE_WRBACK) { - req_type = _PAGE_CACHE_WB; + if (mtrr_type == MTRR_TYPE_WRBACK) actual_type = _PAGE_CACHE_WB; - } else { - req_type = _PAGE_CACHE_UC_MINUS; + else actual_type = _PAGE_CACHE_UC_MINUS; - } - } else { - req_type &= _PAGE_CACHE_MASK; - err = pat_x_mtrr_type(start, end, req_type, &actual_type); - } - - if (err) { - if (ret_type) - *ret_type = actual_type; - - return -EINVAL; - } + } else + actual_type = pat_x_mtrr_type(start, end, + req_type & _PAGE_CACHE_MASK); - new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new_entry) + new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!new) return -ENOMEM; - new_entry->start = start; - new_entry->end = end; - new_entry->type = actual_type; + new->start = start; + new->end = end; + new->type = actual_type; - if (ret_type) - *ret_type = actual_type; + if (new_type) + *new_type = actual_type; spin_lock(&memtype_lock); /* Search for existing mapping that overlaps the current range */ - list_for_each_entry(parse, &memtype_list, nd) { - struct memtype *saved_ptr; - - if (parse->start >= end) { - pr_debug("New Entry\n"); - list_add(&new_entry->nd, parse->nd.prev); - new_entry = NULL; + where = NULL; + list_for_each_entry(entry, &memtype_list, nd) { + if (end <= entry->start) { + where = entry->nd.prev; break; - } - - if (start <= parse->start && end >= parse->start) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; + } else if (start <= entry->start) { /* end > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = entry->nd.prev; } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - } - - if (err) { - break; - } - - pr_debug("Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, saved_ptr->nd.prev); - new_entry = NULL; break; - } - - if (start < parse->end) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - } - - if (err) { - break; + } else if (start < entry->end) { /* start > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = &entry->nd; } - - pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, &saved_ptr->nd); - new_entry = NULL; break; } } if (err) { - printk(KERN_INFO - "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(new_entry->type), - cattr_name(req_type)); - kfree(new_entry); + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " + "track %s, req %s\n", + start, end, cattr_name(new->type), cattr_name(req_type)); + kfree(new); spin_unlock(&memtype_lock); return err; } - if (new_entry) { - /* No conflict. Not yet added to the list. Add to the tail */ - list_add_tail(&new_entry->nd, &memtype_list); - pr_debug("New Entry\n"); - } - - if (ret_type) { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type), cattr_name(*ret_type)); - } else { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type)); - } + if (where) + list_add(&new->nd, where); + else + list_add_tail(&new->nd, &memtype_list); spin_unlock(&memtype_lock); + + dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", + start, end, cattr_name(new->type), cattr_name(req_type), + new_type ? cattr_name(*new_type) : "-"); + return err; } int free_memtype(u64 start, u64 end) { - struct memtype *ml; + struct memtype *entry; int err = -EINVAL; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + if (!pat_enabled) return 0; - } /* Low ISA region is always mapped WB. No need to track */ - if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { + if (is_ISA_range(start, end - 1)) return 0; - } spin_lock(&memtype_lock); - list_for_each_entry(ml, &memtype_list, nd) { - if (ml->start == start && ml->end == end) { - list_del(&ml->nd); - kfree(ml); + list_for_each_entry(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + list_del(&entry->nd); + kfree(entry); err = 0; break; } @@ -452,7 +354,7 @@ int free_memtype(u64 start, u64 end) current->comm, current->pid, start, end); } - pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } @@ -521,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_wc_enabled && - ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + if (!pat_enabled && + !(boot_cpu_has(X86_FEATURE_MTRR) || + boot_cpu_has(X86_FEATURE_K6_MTRR) || + boot_cpu_has(X86_FEATURE_CYRIX_ARR) || + boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { flags = _PAGE_CACHE_UC; } #endif @@ -548,7 +450,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, return 0; if (pfn <= max_pfn_mapped && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -586,4 +488,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } - diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f694..557b2abceef8 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -2,6 +2,7 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> +#include <asm/fixmap.h> pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { @@ -65,12 +66,6 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_ctor(void *p) { pgd_t *pgd = p; - unsigned long flags; - - /* Clear usermode parts of PGD */ - memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); - - spin_lock_irqsave(&pgd_lock, flags); /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the @@ -90,8 +85,6 @@ static void pgd_ctor(void *p) /* list required to sync kernel mapping updates */ if (!SHARED_KERNEL_PMD) pgd_list_add(pgd); - - spin_unlock_irqrestore(&pgd_lock, flags); } static void pgd_dtor(void *pgd) @@ -119,6 +112,72 @@ static void pgd_dtor(void *pgd) #ifdef CONFIG_X86_PAE /* + * In PAE mode, we need to do a cr3 reload (=tlb flush) when + * updating the top-level pagetable entries to guarantee the + * processor notices the update. Since this is expensive, and + * all 4 top-level entries are used almost immediately in a + * new process's life, we just pre-populate them here. + * + * Also, if we're in a paravirt environment where the kernel pmd is + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate + * and initialize the kernel pmds here. + */ +#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD + +void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + + /* Note: almost everything apart from _PAGE_PRESENT is + reserved at the pmd (PDPT) level. */ + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + + /* + * According to Intel App note "TLBs, Paging-Structure Caches, + * and Their Invalidation", April 2007, document 317080-001, + * section 8.1: in PAE mode we explicitly have to flush the + * TLB via cr3 if the top-level pgd is changed... + */ + if (mm == current->active_mm) + write_cr3(read_cr3()); +} +#else /* !CONFIG_X86_PAE */ + +/* No need to prepopulate any pagetable entries in non-PAE modes. */ +#define PREALLOCATED_PMDS 0 + +#endif /* CONFIG_X86_PAE */ + +static void free_pmds(pmd_t *pmds[]) +{ + int i; + + for(i = 0; i < PREALLOCATED_PMDS; i++) + if (pmds[i]) + free_page((unsigned long)pmds[i]); +} + +static int preallocate_pmds(pmd_t *pmds[]) +{ + int i; + bool failed = false; + + for(i = 0; i < PREALLOCATED_PMDS; i++) { + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + if (pmd == NULL) + failed = true; + pmds[i] = pmd; + } + + if (failed) { + free_pmds(pmds); + return -ENOMEM; + } + + return 0; +} + +/* * Mop up any pmd pages which may still be attached to the pgd. * Normally they will be freed by munmap/exit_mmap, but any pmd we * preallocate which never got a corresponding vma will need to be @@ -128,7 +187,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { + for(i = 0; i < PREALLOCATED_PMDS; i++) { pgd_t pgd = pgdp[i]; if (pgd_val(pgd) != 0) { @@ -142,32 +201,17 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) } } -/* - * In PAE mode, we need to do a cr3 reload (=tlb flush) when - * updating the top-level pagetable entries to guarantee the - * processor notices the update. Since this is expensive, and - * all 4 top-level entries are used almost immediately in a - * new process's life, we just pre-populate them here. - * - * Also, if we're in a paravirt environment where the kernel pmd is - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate - * and initialize the kernel pmds here. - */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) +static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { pud_t *pud; unsigned long addr; int i; pud = pud_offset(pgd, 0); - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmd_alloc_one(mm, addr); - if (!pmd) { - pgd_mop_up_pmds(mm, pgd); - return 0; - } + for (addr = i = 0; i < PREALLOCATED_PMDS; + i++, pud++, addr += PUD_SIZE) { + pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), @@ -175,61 +219,54 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) pud_populate(mm, pud, pmd); } - - return 1; } -void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +pgd_t *pgd_alloc(struct mm_struct *mm) { - paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + pgd_t *pgd; + pmd_t *pmds[PREALLOCATED_PMDS]; + unsigned long flags; - /* Note: almost everything apart from _PAGE_PRESENT is - reserved at the pmd (PDPT) level. */ - set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - /* - * According to Intel App note "TLBs, Paging-Structure Caches, - * and Their Invalidation", April 2007, document 317080-001, - * section 8.1: in PAE mode we explicitly have to flush the - * TLB via cr3 if the top-level pgd is changed... - */ - if (mm == current->active_mm) - write_cr3(read_cr3()); -} -#else /* !CONFIG_X86_PAE */ -/* No need to prepopulate any pagetable entries in non-PAE modes. */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) -{ - return 1; -} + if (pgd == NULL) + goto out; -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) -{ -} -#endif /* CONFIG_X86_PAE */ + mm->pgd = pgd; -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (preallocate_pmds(pmds) != 0) + goto out_free_pgd; - /* so that alloc_pmd can use it */ - mm->pgd = pgd; - if (pgd) - pgd_ctor(pgd); + if (paravirt_pgd_alloc(mm) != 0) + goto out_free_pmds; - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { - pgd_dtor(pgd); - free_page((unsigned long)pgd); - pgd = NULL; - } + /* + * Make sure that pre-populating the pmds is atomic with + * respect to anything walking the pgd_list, so that they + * never see a partially populated pgd. + */ + spin_lock_irqsave(&pgd_lock, flags); + + pgd_ctor(pgd); + pgd_prepopulate_pmd(mm, pgd, pmds); + + spin_unlock_irqrestore(&pgd_lock, flags); return pgd; + +out_free_pmds: + free_pmds(pmds); +out_free_pgd: + free_page((unsigned long)pgd); +out: + return NULL; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); + paravirt_pgd_free(mm, pgd); free_page((unsigned long)pgd); } @@ -255,7 +292,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - &ptep->pte); + (unsigned long *) &ptep->pte); if (ret) pte_update(vma->vm_mm, addr, ptep); @@ -274,3 +311,22 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } + +int fixmaps_set; + +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + set_pte_vaddr(address, pte); + fixmaps_set++; +} + +void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); +} diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 369cf065b6a4..b4becbf8c570 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -71,7 +71,7 @@ void show_mem(void) * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. */ -static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) { pgd_t *pgd; pud_t *pud; @@ -94,8 +94,8 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) return; } pte = pte_offset_kernel(pmd, vaddr); - if (pgprot_val(flags)) - set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); + if (pte_val(pteval)) + set_pte_present(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); @@ -141,22 +141,9 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) __flush_tlb_one(vaddr); } -static int fixmaps; unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); - fixmaps++; -} - /** * reserve_top_address - reserves a hole in the top of kernel address space * @reserve - size of hole to reserve @@ -164,11 +151,44 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) * Can be used to relocate the fixmap area and poke a hole in the top * of kernel address space to make room for a hypervisor. */ -void reserve_top_address(unsigned long reserve) +void __init reserve_top_address(unsigned long reserve) { - BUG_ON(fixmaps > 0); + BUG_ON(fixmaps_set > 0); printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; } + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the + * vmalloc area - the default is 128m. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + + __VMALLOC_RESERVE = memparse(arg, &arg); + return 0; +} +early_param("vmalloc", parse_vmalloc); + +/* + * reservetop=size reserves a hole at the top of the kernel address space which + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, + * so relocating the fixmap can be done before paging initialization. + */ +static int __init parse_reservetop(char *arg) +{ + unsigned long address; + + if (!arg) + return -EINVAL; + + address = memparse(arg, &arg); + reserve_top_address(address); + return 0; +} +early_param("reservetop", parse_reservetop); diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/mm/srat_32.c index 70e4a374b4e8..f41d67f8f831 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -31,6 +31,7 @@ #include <asm/srat.h> #include <asm/topology.h> #include <asm/smp.h> +#include <asm/e820.h> /* * proximity macros and definitions @@ -41,7 +42,7 @@ #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) -static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ +static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ #define MAX_CHUNKS_PER_NODE 3 #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) @@ -52,16 +53,37 @@ struct node_memory_chunk_s { u8 nid; // which cnode contains this chunk? u8 bank; // which mem bank on this node }; -static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; +static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; -static int num_memory_chunks; /* total number of memory chunks */ +static int __initdata num_memory_chunks; /* total number of memory chunks */ static u8 __initdata apicid_to_pxm[MAX_APICID]; +int numa_off __initdata; +int acpi_numa __initdata; + +static __init void bad_srat(void) +{ + printk(KERN_ERR "SRAT: SRAT not used.\n"); + acpi_numa = -1; + num_memory_chunks = 0; +} + +static __init inline int srat_disabled(void) +{ + return numa_off || acpi_numa < 0; +} + /* Identify CPU proximity domains */ -static void __init parse_cpu_affinity_structure(char *p) +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) { - struct acpi_srat_cpu_affinity *cpu_affinity = - (struct acpi_srat_cpu_affinity *) p; + if (srat_disabled()) + return; + if (cpu_affinity->header.length != + sizeof(struct acpi_srat_cpu_affinity)) { + bad_srat(); + return; + } if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) return; /* empty entry */ @@ -71,7 +93,7 @@ static void __init parse_cpu_affinity_structure(char *p) apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; - printk("CPU 0x%02X in proximity domain 0x%02X\n", + printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo); } @@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p) * Identify memory proximity domains and hot-remove capabilities. * Fill node memory chunk list structure. */ -static void __init parse_memory_affinity_structure (char *sratp) +void __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) { unsigned long long paddr, size; unsigned long start_pfn, end_pfn; u8 pxm; struct node_memory_chunk_s *p, *q, *pend; - struct acpi_srat_mem_affinity *memory_affinity = - (struct acpi_srat_mem_affinity *) sratp; + + if (srat_disabled()) + return; + if (memory_affinity->header.length != + sizeof(struct acpi_srat_mem_affinity)) { + bad_srat(); + return; + } if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) return; /* empty entry */ @@ -105,7 +134,8 @@ static void __init parse_memory_affinity_structure (char *sratp) if (num_memory_chunks >= MAXCHUNKS) { - printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", + printk(KERN_WARNING "Too many mem chunks in SRAT." + " Ignoring %lld MBytes at %llx\n", size/(1024*1024), paddr); return; } @@ -126,7 +156,8 @@ static void __init parse_memory_affinity_structure (char *sratp) num_memory_chunks++; - printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n", + printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)" + " in proximity domain %02x %s\n", start_pfn, end_pfn, memory_affinity->memory_type, pxm, @@ -134,6 +165,14 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } +/* Callback for SLIT parsing */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ +} + +void acpi_numa_arch_fixup(void) +{ +} /* * The SRAT table always lists ascending addresses, so can always * assume that the first "start" address that you see is the real @@ -149,7 +188,7 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c * *possible* memory hotplug areas the same as normal RAM. */ if (memory_chunk->start_pfn >= max_pfn) { - printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n", + printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", memory_chunk->start_pfn, memory_chunk->end_pfn); return; } @@ -166,42 +205,17 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -/* Parse the ACPI Static Resource Affinity Table */ -static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) +int __init get_memcfg_from_srat(void) { - u8 *start, *end, *p; int i, j, nid; - start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ - p = start; - end = (u8 *)sratp + sratp->header.length; - memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ - memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); - - num_memory_chunks = 0; - while (p < end) { - switch (*p) { - case ACPI_SRAT_TYPE_CPU_AFFINITY: - parse_cpu_affinity_structure(p); - break; - case ACPI_SRAT_TYPE_MEMORY_AFFINITY: - parse_memory_affinity_structure(p); - break; - default: - printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); - break; - } - p += p[1]; - if (p[1] == 0) { - printk("acpi20_parse_srat: Entry length value is zero;" - " can't parse any further!\n"); - break; - } - } + if (srat_disabled()) + goto out_fail; if (num_memory_chunks == 0) { - printk("could not finy any ACPI SRAT memory areas.\n"); + printk(KERN_WARNING + "could not finy any ACPI SRAT memory areas.\n"); goto out_fail; } @@ -228,131 +242,39 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) for (i = 0; i < num_memory_chunks; i++) node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); - printk("pxm bitmap: "); + printk(KERN_DEBUG "pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { - printk("%02X ", pxm_bitmap[i]); + printk(KERN_CONT "%02x ", pxm_bitmap[i]); } - printk("\n"); - printk("Number of logical nodes in system = %d\n", num_online_nodes()); - printk("Number of memory chunks in system = %d\n", num_memory_chunks); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG "Number of logical nodes in system = %d\n", + num_online_nodes()); + printk(KERN_DEBUG "Number of memory chunks in system = %d\n", + num_memory_chunks); for (i = 0; i < MAX_APICID; i++) apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; - printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", + printk(KERN_DEBUG + "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); - add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); + e820_register_active_regions(chunk->nid, chunk->start_pfn, + min(chunk->end_pfn, max_pfn)); } - + for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; - unsigned long end = node_end_pfn[nid]; + unsigned long end = min(node_end_pfn[nid], max_pfn); memory_present(nid, start, end); node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); } return 1; out_fail: - return 0; -} - -struct acpi_static_rsdt { - struct acpi_table_rsdt table; - u32 padding[7]; /* Allow for 7 more table entries */ -}; - -int __init get_memcfg_from_srat(void) -{ - struct acpi_table_header *header = NULL; - struct acpi_table_rsdp *rsdp = NULL; - struct acpi_table_rsdt *rsdt = NULL; - acpi_native_uint rsdp_address = 0; - struct acpi_static_rsdt saved_rsdt; - int tables = 0; - int i = 0; - - rsdp_address = acpi_os_get_root_pointer(); - if (!rsdp_address) { - printk("%s: System description tables not found\n", - __func__); - goto out_err; - } - - printk("%s: assigning address to rsdp\n", __func__); - rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address; - if (!rsdp) { - printk("%s: Didn't find ACPI root!\n", __func__); - goto out_err; - } - - printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, - rsdp->oem_id); - - if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) { - printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__); - goto out_err; - } - - rsdt = (struct acpi_table_rsdt *) - early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); - - if (!rsdt) { - printk(KERN_WARNING - "%s: ACPI: Invalid root system description tables (RSDT)\n", - __func__); - goto out_err; - } - - header = &rsdt->header; - - if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { - printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); - goto out_err; - } - - /* - * The number of tables is computed by taking the - * size of all entries (header size minus total - * size of RSDT) divided by the size of each entry - * (4-byte table pointers). - */ - tables = (header->length - sizeof(struct acpi_table_header)) / 4; - - if (!tables) - goto out_err; - - memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); - - if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { - printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", - saved_rsdt.table.header.length); - goto out_err; - } - - printk("Begin SRAT table scan....\n"); - - for (i = 0; i < tables; i++) { - /* Map in header, then map in full table length. */ - header = (struct acpi_table_header *) - early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); - if (!header) - break; - header = (struct acpi_table_header *) - early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); - if (!header) - break; - - if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) - continue; - - /* we've found the srat table. don't need to look at any more tables */ - return acpi20_parse_srat((struct acpi_table_srat *)header); - } -out_err: - remove_all_active_ranges(); - printk("failed to get NUMA memory information from SRAT table\n"); + printk(KERN_ERR "failed to get NUMA memory information from SRAT" + " table\n"); return 0; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 99649dccad28..0fd67b81a8b6 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -299,7 +299,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) pxmram = 0; } - e820ram = end_pfn - absent_pages_in_range(0, end_pfn); + e820ram = max_pfn - absent_pages_in_range(0, max_pfn); /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ if ((long)(e820ram - pxmram) >= 1*1024*1024) { printk(KERN_ERR @@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (node == NUMA_NO_NODE) continue; if (!node_isset(node, node_possible_map)) - numa_set_node(i, NUMA_NO_NODE); + numa_clear_node(i); } numa_init_array(); return 0; @@ -495,6 +495,7 @@ int __node_distance(int a, int b) EXPORT_SYMBOL(__node_distance); +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start) { int i, ret = 0; @@ -506,4 +507,4 @@ int memory_add_physaddr_to_nid(u64 start) return ret; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - +#endif diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3fde545..2b6ad5b9f9d5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy) static void nmi_shutdown(void) { - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &get_cpu_var(cpu_msrs); nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); unregister_die_notifier(&profile_exceptions_nb); model->shutdown(msrs); free_msrs(); + put_cpu_var(cpu_msrs); } static void nmi_cpu_start(void *dummy) diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 89ec35d00efd..a34fbf557926 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -13,12 +13,14 @@ pci-y := fixup.o pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o -# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are +# Careful: VISWS overrule the pci-y above. The colons are # therefor correct. This needs a proper fix by distangling the code. pci-$(CONFIG_X86_VISWS) := visws.o fixup.o -pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o + +pci-$(CONFIG_X86_NUMAQ) += numa.o # Necessary for NUMAQ as well pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o +obj-y += amd_bus.o diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 8fbd19832cf6..fd47068c95de 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-y += k8-bus_64.o +obj-y += amd_bus.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d95de2f199cd..4fa52d3dc848 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (node != -1) set_mp_bus_to_node(busnum, node); else - node = get_mp_bus_to_node(busnum); #endif + node = get_mp_bus_to_node(busnum); + + if (node != -1 && !node_online(node)) + node = -1; /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. @@ -204,21 +207,22 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (!bus) kfree(sd); + if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA - if (bus) { - if (pxm >= 0) { + if (pxm >= 0) printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", - busnum, pxm, pxm_to_node(pxm)); - } - } + busnum, pxm, node); +#else + printk(KERN_DEBUG "bus %02x -> node %d\n", + busnum, node); #endif + } if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); return bus; } -extern int pci_routeirq; static int __init pci_acpi_init(void) { struct pci_dev *dev = NULL; diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/amd_bus.c index 5c2799c20e47..d02c598451ec 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/amd_bus.c @@ -1,5 +1,9 @@ #include <linux/init.h> #include <linux/pci.h> +#include "pci.h" + +#ifdef CONFIG_X86_64 + #include <asm/pci-direct.h> #include <asm/mpspec.h> #include <linux/cpumask.h> @@ -384,7 +388,7 @@ static int __init early_fill_mp_bus_info(void) /* need to take out [0, TOM) for RAM*/ address = MSR_K8_TOP_MEM1; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); if (end < (1ULL<<32)) update_range(range, 0, end - 1); @@ -478,7 +482,7 @@ static int __init early_fill_mp_bus_info(void) /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); update_range(range, 1ULL<<32, end - 1); } @@ -526,3 +530,31 @@ static int __init early_fill_mp_bus_info(void) } postcore_initcall(early_fill_mp_bus_info); + +#endif + +/* common 32/64 bit code */ + +#define ENABLE_CF8_EXT_CFG (1ULL << 46) + +static void enable_pci_io_ecs_per_cpu(void *unused) +{ + u64 reg; + rdmsrl(MSR_AMD64_NB_CFG, reg); + if (!(reg & ENABLE_CF8_EXT_CFG)) { + reg |= ENABLE_CF8_EXT_CFG; + wrmsrl(MSR_AMD64_NB_CFG, reg); + } +} + +static int __init enable_pci_io_ecs(void) +{ + /* assume all cpus from fam10h have IO ECS */ + if (boot_cpu_data.x86 < 0x10) + return 0; + on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1); + pci_probe |= PCI_HAS_IO_ECS; + return 0; +} + +postcore_initcall(enable_pci_io_ecs); diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 21d1e0e0d535..9915293500fb 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -8,18 +8,21 @@ #include "pci.h" /* - * Functions for accessing PCI configuration space with type 1 accesses + * Functions for accessing PCI base (first 256 bytes) and extended + * (4096 bytes per PCI function) configuration space with type 1 + * accesses. */ #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ + | (devfn << 8) | (reg & 0xFC)) static int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) { + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { *value = -1; return -EINVAL; } @@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); @@ -260,10 +263,18 @@ void __init pci_direct_init(int type) return; printk(KERN_INFO "PCI: Using configuration type %d for base access\n", type); - if (type == 1) + if (type == 1) { raw_pci_ops = &pci_direct_conf1; - else - raw_pci_ops = &pci_direct_conf2; + if (raw_pci_ext_ops) + return; + if (!(pci_probe & PCI_HAS_IO_ECS)) + return; + printk(KERN_INFO "PCI: Using configuration type 1 " + "for extended access\n"); + raw_pci_ext_ops = &pci_direct_conf1; + return; + } + raw_pci_ops = &pci_direct_conf2; } int __init pci_direct_probe(void) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 10fb308fded8..6ccd7a108cd4 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -299,9 +299,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); - if (pat_wc_enabled && write_combine) + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; - else if (pat_wc_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index ca8df9c260bc..f0859de23e20 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -11,8 +11,8 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/dmi.h> -#include <asm/io.h> -#include <asm/smp.h> +#include <linux/io.h> +#include <linux/smp.h> #include <asm/io_apic.h> #include <linux/irq.h> #include <linux/acpi.h> @@ -61,7 +61,7 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; * and perform checksum verification. */ -static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) { struct irq_routing_table *rt; int i; @@ -74,7 +74,7 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) rt->size < sizeof(struct irq_routing_table)) return NULL; sum = 0; - for (i=0; i < rt->size; i++) + for (i = 0; i < rt->size; i++) sum += addr[i]; if (!sum) { DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); @@ -100,7 +100,7 @@ static struct irq_routing_table * __init pirq_find_routing_table(void) return rt; printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); } - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { rt = pirq_check_routing_table(addr); if (rt) return rt; @@ -122,20 +122,20 @@ static void __init pirq_peer_trick(void) struct irq_info *e; memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { e = &rt->slots[i]; #ifdef DEBUG { int j; DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) + for (j = 0; j < 4; j++) DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); DBG("\n"); } #endif busmap[e->bus] = 1; } - for(i = 1; i < 256; i++) { + for (i = 1; i < 256; i++) { int node; if (!busmap[i] || pci_find_bus(0, i)) continue; @@ -285,7 +285,7 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; WARN_ON_ONCE(pirq > 4); - return read_config_nybble(router,0x43, pirqmap[pirq-1]); + return read_config_nybble(router, 0x43, pirqmap[pirq-1]); } static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -314,7 +314,7 @@ static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, /* * Cyrix: nibble offset 0x5C - * 0x5C bits 7:4 is INTB bits 3:0 is INTA + * 0x5C bits 7:4 is INTB bits 3:0 is INTA * 0x5D bits 7:4 is INTD bits 3:0 is INTC */ static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) @@ -350,7 +350,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * Apparently there are systems implementing PCI routing table using * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. * We try our best to handle both link mappings. - * + * * Currently (2003-05-21) it appears most SiS chipsets follow the * definition of routing registers from the SiS-5595 southbridge. * According to the SiS 5595 datasheets the revision id's of the @@ -370,7 +370,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * * 0x62: USBIRQ: * bit 6 OHCI function disabled (0), enabled (1) - * + * * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved * * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved @@ -487,9 +487,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq u8 irq; irq = 0; if (pirq <= 4) - { irq = read_config_nybble(router, 0x56, pirq - 1); - } printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", dev->vendor, dev->device, pirq, irq); return irq; @@ -497,12 +495,10 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) - { write_config_nybble(router, 0x56, pirq - 1, irq); - } return 1; } @@ -549,50 +545,49 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route if (pci_dev_present(pirq_440gx)) return 0; - switch(device) - { - case PCI_DEVICE_ID_INTEL_82371FB_0: - case PCI_DEVICE_ID_INTEL_82371SB_0: - case PCI_DEVICE_ID_INTEL_82371AB_0: - case PCI_DEVICE_ID_INTEL_82371MX: - case PCI_DEVICE_ID_INTEL_82443MX_0: - case PCI_DEVICE_ID_INTEL_82801AA_0: - case PCI_DEVICE_ID_INTEL_82801AB_0: - case PCI_DEVICE_ID_INTEL_82801BA_0: - case PCI_DEVICE_ID_INTEL_82801BA_10: - case PCI_DEVICE_ID_INTEL_82801CA_0: - case PCI_DEVICE_ID_INTEL_82801CA_12: - case PCI_DEVICE_ID_INTEL_82801DB_0: - case PCI_DEVICE_ID_INTEL_82801E_0: - case PCI_DEVICE_ID_INTEL_82801EB_0: - case PCI_DEVICE_ID_INTEL_ESB_1: - case PCI_DEVICE_ID_INTEL_ICH6_0: - case PCI_DEVICE_ID_INTEL_ICH6_1: - case PCI_DEVICE_ID_INTEL_ICH7_0: - case PCI_DEVICE_ID_INTEL_ICH7_1: - case PCI_DEVICE_ID_INTEL_ICH7_30: - case PCI_DEVICE_ID_INTEL_ICH7_31: - case PCI_DEVICE_ID_INTEL_ESB2_0: - case PCI_DEVICE_ID_INTEL_ICH8_0: - case PCI_DEVICE_ID_INTEL_ICH8_1: - case PCI_DEVICE_ID_INTEL_ICH8_2: - case PCI_DEVICE_ID_INTEL_ICH8_3: - case PCI_DEVICE_ID_INTEL_ICH8_4: - case PCI_DEVICE_ID_INTEL_ICH9_0: - case PCI_DEVICE_ID_INTEL_ICH9_1: - case PCI_DEVICE_ID_INTEL_ICH9_2: - case PCI_DEVICE_ID_INTEL_ICH9_3: - case PCI_DEVICE_ID_INTEL_ICH9_4: - case PCI_DEVICE_ID_INTEL_ICH9_5: - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: - case PCI_DEVICE_ID_INTEL_ICH10_0: - case PCI_DEVICE_ID_INTEL_ICH10_1: - case PCI_DEVICE_ID_INTEL_ICH10_2: - case PCI_DEVICE_ID_INTEL_ICH10_3: - r->name = "PIIX/ICH"; - r->get = pirq_piix_get; - r->set = pirq_piix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_1: + case PCI_DEVICE_ID_INTEL_ICH6_0: + case PCI_DEVICE_ID_INTEL_ICH6_1: + case PCI_DEVICE_ID_INTEL_ICH7_0: + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: + case PCI_DEVICE_ID_INTEL_ICH9_0: + case PCI_DEVICE_ID_INTEL_ICH9_1: + case PCI_DEVICE_ID_INTEL_ICH9_2: + case PCI_DEVICE_ID_INTEL_ICH9_3: + case PCI_DEVICE_ID_INTEL_ICH9_4: + case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: + case PCI_DEVICE_ID_INTEL_ICH10_0: + case PCI_DEVICE_ID_INTEL_ICH10_1: + case PCI_DEVICE_ID_INTEL_ICH10_2: + case PCI_DEVICE_ID_INTEL_ICH10_3: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; } return 0; } @@ -606,7 +601,7 @@ static __init int via_router_probe(struct irq_router *r, * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { - switch(router->device) { + switch (router->device) { case PCI_DEVICE_ID_VIA_82C686: /* * Asus k7m bios wrongly reports 82C686A @@ -631,7 +626,7 @@ static __init int via_router_probe(struct irq_router *r, } } - switch(device) { + switch (device) { case PCI_DEVICE_ID_VIA_82C586_0: r->name = "VIA"; r->get = pirq_via586_get; @@ -654,13 +649,12 @@ static __init int via_router_probe(struct irq_router *r, static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_VLSI_82C534: - r->name = "VLSI 82C534"; - r->get = pirq_vlsi_get; - r->set = pirq_vlsi_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; } return 0; } @@ -668,14 +662,13 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - case PCI_DEVICE_ID_SERVERWORKS_CSB5: - r->name = "ServerWorks"; - r->get = pirq_serverworks_get; - r->set = pirq_serverworks_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; } return 0; } @@ -684,7 +677,7 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, { if (device != PCI_DEVICE_ID_SI_503) return 0; - + r->name = "SIS"; r->get = pirq_sis_get; r->set = pirq_sis_set; @@ -693,47 +686,43 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_CYRIX_5520: - r->name = "NatSemi"; - r->get = pirq_cyrix_get; - r->set = pirq_cyrix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; } return 0; } static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_OPTI_82C700: - r->name = "OPTI"; - r->get = pirq_opti_get; - r->set = pirq_opti_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; } return 0; } static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_ITE_IT8330G_0: - r->name = "ITE"; - r->get = pirq_ite_get; - r->set = pirq_ite_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; } return 0; } static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { + switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); @@ -747,25 +736,24 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_AMD_VIPER_740B: - r->name = "AMD756"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7413: - r->name = "AMD766"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7443: - r->name = "AMD768"; - break; - default: - return 0; + switch (device) { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; } r->get = pirq_amd756_get; r->set = pirq_amd756_set; return 1; } - + static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { switch (device) { @@ -807,7 +795,7 @@ static struct pci_dev *pirq_router_dev; * FIXME: should we have an option to say "generic for * chipset" ? */ - + static void __init pirq_find_router(struct irq_router *r) { struct irq_routing_table *rt = pirq_table; @@ -826,7 +814,7 @@ static void __init pirq_find_router(struct irq_router *r) r->name = "default"; r->get = NULL; r->set = NULL; - + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); @@ -837,7 +825,7 @@ static void __init pirq_find_router(struct irq_router *r) return; } - for( h = pirq_routers; h->vendor; h++) { + for (h = pirq_routers; h->vendor; h++) { /* First look for a router match */ if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) break; @@ -889,7 +877,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - + DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { @@ -928,8 +916,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) */ newirq = dev->irq; if (newirq && !((1 << newirq) & mask)) { - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk("\n" KERN_WARNING + if (pci_probe & PCI_USE_PIRQ_MASK) + newirq = 0; + else + printk("\n" KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask " "- try pci=usepirqmask\n" KERN_DEBUG, newirq, pci_name(dev)); @@ -949,8 +939,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) irq = pirq & 0xf; DBG(" -> hardcoded IRQ %d\n", irq); msg = "Hardcoded"; - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { DBG(" -> got IRQ %d\n", irq); msg = "Found"; eisa_set_level_irq(irq); @@ -985,15 +975,15 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) continue; if (info->irq[pin].link == pirq) { /* We refuse to override the dev->irq information. Give a warning! */ - if ( dev2->irq && dev2->irq != irq && \ + if (dev2->irq && dev2->irq != irq && \ (!(pci_probe & PCI_USE_PIRQ_MASK) || \ - ((1 << dev2->irq) & mask)) ) { + ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", + printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", pci_name(dev2), dev2->irq, irq); #endif - continue; - } + continue; + } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) @@ -1031,8 +1021,7 @@ static void __init pcibios_fixup_irqs(void) /* * Recalculate IRQ numbers if we use the I/O APIC. */ - if (io_apic_assign_pci_irqs) - { + if (io_apic_assign_pci_irqs) { int irq; if (pin) { @@ -1045,10 +1034,10 @@ static void __init pcibios_fixup_irqs(void) * busses itself so we should get into this branch reliably. */ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", @@ -1138,7 +1127,7 @@ static int __init pcibios_irq_init(void) pirq_find_router(&pirq_router); if (pirq_table->exclusive_irqs) { int i; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (!(pirq_table->exclusive_irqs & (1 << i))) pirq_penalty[i] += 100; } @@ -1203,10 +1192,10 @@ static int pirq_enable_irq(struct pci_dev *dev) */ temp_dev = dev; while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 0cfebecf2a8f..23faaa890ffc 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -374,7 +374,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int early) +static void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index d9afbae5092b..99f1ecd485b5 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c @@ -6,45 +6,21 @@ #include <linux/init.h> #include <linux/nodemask.h> #include <mach_apic.h> +#include <asm/mpspec.h> #include "pci.h" #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -int mp_bus_id_to_node[MAX_MP_BUSSES]; #define BUS2QUAD(global) (mp_bus_id_to_node[global]) -int mp_bus_id_to_local[MAX_MP_BUSSES]; #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) -void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->mpc_busid, name, quad); -} - -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; -} /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -#ifdef CONFIG_X86_NUMAQ EXPORT_SYMBOL(xquad_portio); -#endif #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) @@ -179,6 +155,9 @@ static int __init pci_numa_init(void) { int quad; + if (!found_numaq) + return 0; + raw_pci_ops = &pci_direct_conf1_mq; if (pcibios_scanned++) diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 720c4c554534..ba263e626a68 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -27,6 +27,7 @@ #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index b542355e0e34..6dd000dd7933 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -83,7 +83,7 @@ static int set_up_temporary_mappings(void) /* Set up the direct mapping from scratch */ start = (unsigned long)pfn_to_kaddr(0); - end = (unsigned long)pfn_to_kaddr(end_pfn); + end = (unsigned long)pfn_to_kaddr(max_pfn); for (; start < end; start = next) { pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 3fdd51497a83..19a6cfaf5db9 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -16,7 +16,7 @@ #include "vextern.h" /* Just for VMAGIC. */ #undef VEXTERN -int vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = 1; extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6c388e593bc8..c2cc99580871 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -12,3 +12,13 @@ config XEN This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the Xen hypervisor. + +config XEN_MAX_DOMAIN_MEMORY + int "Maximum allowed size of a domain in gigabytes" + default 8 + depends on XEN + help + The pseudo-physical to machine address array is sized + according to the maximum possible memory size of a Xen + domain. This array uses 1 page per gigabyte, so there's no + need to be too stingy here.
\ No newline at end of file diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5fd..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o manage.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09c1c69c37a..dcd4e51f2f16 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> +#include <asm/pgalloc.h> #include "xen-ops.h" #include "mmu.h" @@ -75,13 +76,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); -static /* __initdata */ struct shared_info dummy_shared_info; +struct shared_info xen_dummy_shared_info; /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. */ -struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; +struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; /* * Flag to determine whether vcpu info placement is available on all @@ -98,13 +99,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; */ static int have_vcpu_info_placement = 1; -static void __init xen_vcpu_setup(int cpu) +static void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; struct vcpu_info *vcpup; - BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; if (!have_vcpu_info_placement) @@ -136,11 +137,41 @@ static void __init xen_vcpu_setup(int cpu) } } +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + if (have_vcpu_info_placement) { + int cpu; + + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); + + xen_vcpu_setup(cpu); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); + } + + BUG_ON(!have_vcpu_info_placement); + } +} + static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); + printk(KERN_INFO "Hypervisor signature: %s%s\n", + xen_start_info->magic, + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } static void xen_cpuid(unsigned int *ax, unsigned int *bx, @@ -235,13 +266,13 @@ static void xen_irq_enable(void) { struct vcpu_info *vcpu; - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); + /* We don't need to worry about being preempted here, since + either a) interrupts are disabled, so no preemption, or b) + the caller is confused and is trying to re-enable interrupts + on an indeterminate processor. */ + vcpu = x86_read_percpu(xen_vcpu); vcpu->evtchn_upcall_mask = 0; - preempt_enable_no_resched(); /* Doesn't matter if we get preempted here, because any pending event will get dealt with anyway. */ @@ -254,7 +285,7 @@ static void xen_irq_enable(void) static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) BUG(); } @@ -607,6 +638,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, xen_mc_issue(PARAVIRT_LAZY_MMU); } +static void xen_clts(void) +{ + struct multicall_space mcs; + + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_write_cr0(unsigned long cr0) +{ + struct multicall_space mcs; + + /* Only pay attention to cr0.TS; everything else is + ignored. */ + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + static void xen_write_cr2(unsigned long cr2) { x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; @@ -624,8 +679,10 @@ static unsigned long xen_read_cr2_direct(void) static void xen_write_cr4(unsigned long cr4) { - /* Just ignore cr4 changes; Xen doesn't allow us to do - anything anyway. */ + cr4 &= ~X86_CR4_PGE; + cr4 &= ~X86_CR4_PSE; + + native_write_cr4(cr4); } static unsigned long xen_read_cr3(void) @@ -831,7 +888,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -854,6 +911,8 @@ static __init void setup_shared_info(void) /* In UP this is as good a place as any to set up shared info */ xen_setup_vcpu_info_placement(); #endif + + xen_setup_mfn_list_list(); } static __init void xen_pagetable_setup_done(pgd_t *base) @@ -866,15 +925,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } +static __init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; + + xen_mark_init_mm_pinned(); +} + /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; @@ -947,6 +1014,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif + case FIX_WP_TEST: + case FIX_VDSO: +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); +} + static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -960,7 +1054,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { .banner = xen_banner, .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, - .post_allocator_init = xen_mark_init_mm_pinned, + .post_allocator_init = xen_post_allocator_init, }; static const struct pv_time_ops xen_time_ops __initdata = { @@ -968,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = { .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, - .get_cpu_khz = xen_cpu_khz, + .get_tsc_khz = xen_tsc_khz, .sched_clock = xen_sched_clock, }; @@ -978,10 +1072,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_debugreg = xen_set_debugreg, .get_debugreg = xen_get_debugreg, - .clts = native_clts, + .clts = xen_clts, .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, + .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, @@ -995,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_pmc = native_read_pmc, .iret = xen_iret, - .irq_enable_syscall_ret = xen_sysexit, + .irq_enable_sysexit = xen_sysexit, .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, @@ -1029,6 +1123,9 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { .irq_enable = xen_irq_enable, .safe_halt = xen_safe_halt, .halt = xen_halt, +#ifdef CONFIG_X86_64 + .adjust_exception_frame = paravirt_nop, +#endif }; static const struct pv_apic_ops xen_apic_ops __initdata = { @@ -1060,6 +1157,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, + .pgd_alloc = __paravirt_pgd_alloc, + .pgd_free = paravirt_nop, + .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, .alloc_pmd = xen_alloc_pte_init, @@ -1072,9 +1172,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte = NULL, /* see xen_pagetable_setup_* */ .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, + .pte_flags = native_pte_val, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, @@ -1082,7 +1186,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud, + .set_pud = xen_set_pud_hyper, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, @@ -1097,6 +1201,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy, }, + + .set_fixmap = xen_set_fixmap, }; #ifdef CONFIG_SMP @@ -1114,11 +1220,13 @@ static const struct smp_ops xen_smp_ops __initdata = { static void xen_reboot(int reason) { + struct sched_shutdown r = { .reason = reason }; + #ifdef CONFIG_SMP smp_send_stop(); #endif - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); } @@ -1173,6 +1281,8 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); + xen_setup_features(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; @@ -1182,21 +1292,26 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { + pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; + pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; + } + machine_ops = xen_machine_ops; #ifdef CONFIG_SMP smp_ops = xen_smp_ops; #endif - xen_setup_features(); - /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) - phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + xen_build_dynamic_phys_to_machine(); pgd = (pgd_t *)xen_start_info->pt_base; + init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; init_mm.pgd = pgd; /* use the Xen pagetables to start */ @@ -1232,9 +1347,12 @@ asmlinkage void __init xen_start_kernel(void) ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - if (!is_initial_xendomain()) + if (!is_initial_xendomain()) { + add_preferred_console("xenboot", 0, NULL); + add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } /* Start the world */ - start_kernel(); + i386_start_kernel(); } diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc0..000000000000 --- a/arch/x86/xen/manage.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Handle extern requests for shutdown, reboot and sysrq - */ -#include <linux/kernel.h> -#include <linux/err.h> -#include <linux/reboot.h> -#include <linux/sysrq.h> - -#include <xen/xenbus.h> - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 - -/* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; - -static void shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - char *str; - struct xenbus_transaction xbt; - int err; - - if (shutting_down != SHUTDOWN_INVALID) - return; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - - str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); - /* Ignore read errors and empty reads. */ - if (XENBUS_IS_ERR_READ(str)) { - xenbus_transaction_end(xbt, 1); - return; - } - - xenbus_write(xbt, "control", "shutdown", ""); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) { - kfree(str); - goto again; - } - - if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) - orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) - ctrl_alt_del(); - else { - printk(KERN_INFO "Ignoring shutdown request: %s\n", str); - shutting_down = SHUTDOWN_INVALID; - } - - kfree(str); -} - -static void sysrq_handler(struct xenbus_watch *watch, const char **vec, - unsigned int len) -{ - char sysrq_key = '\0'; - struct xenbus_transaction xbt; - int err; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { - printk(KERN_ERR "Unable to read sysrq code in " - "control/sysrq\n"); - xenbus_transaction_end(xbt, 1); - return; - } - - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - - if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); -} - -static struct xenbus_watch shutdown_watch = { - .node = "control/shutdown", - .callback = shutdown_handler -}; - -static struct xenbus_watch sysrq_watch = { - .node = "control/sysrq", - .callback = sysrq_handler -}; - -static int setup_shutdown_watcher(void) -{ - int err; - - err = register_xenbus_watch(&shutdown_watch); - if (err) { - printk(KERN_ERR "Failed to set shutdown watcher\n"); - return err; - } - - err = register_xenbus_watch(&sysrq_watch); - if (err) { - printk(KERN_ERR "Failed to set sysrq watcher\n"); - return err; - } - - return 0; -} - -static int shutdown_event(struct notifier_block *notifier, - unsigned long event, - void *data) -{ - setup_shutdown_watcher(); - return NOTIFY_DONE; -} - -static int __init setup_shutdown_event(void) -{ - static struct notifier_block xenstore_notifier = { - .notifier_call = shutdown_event - }; - register_xenstore_notifier(&xenstore_notifier); - - return 0; -} - -subsys_initcall(setup_shutdown_event); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index df40bf74ea75..42b3b9ed641d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,6 +56,131 @@ #include "multicalls.h" #include "mmu.h" +#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) +#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) + +/* Placeholder for holes in the address space */ +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; + + /* Array of pointers to pages containing p2m entries */ +static unsigned long *p2m_top[TOP_ENTRIES] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; + +/* Arrays of p2m arrays expressed in mfns used for save/restore */ +static unsigned long p2m_top_mfn[TOP_ENTRIES] + __attribute__((section(".bss.page_aligned"))); + +static unsigned long p2m_top_mfn_list[ + PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] + __attribute__((section(".bss.page_aligned"))); + +static inline unsigned p2m_top_index(unsigned long pfn) +{ + BUG_ON(pfn >= MAX_DOMAIN_PAGES); + return pfn / P2M_ENTRIES_PER_PAGE; +} + +static inline unsigned p2m_index(unsigned long pfn) +{ + return pfn % P2M_ENTRIES_PER_PAGE; +} + +/* Build the parallel p2m_top_mfn structures */ +void xen_setup_mfn_list_list(void) +{ + unsigned pfn, idx; + + for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); + } + + for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { + unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; + p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); + } + + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(p2m_top_mfn_list); + HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; +} + +/* Set up p2m_top to point to the domain-builder provided p2m pages */ +void __init xen_build_dynamic_phys_to_machine(void) +{ + unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); + unsigned pfn; + + for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top[topidx] = &mfn_list[pfn]; + } +} + +unsigned long get_phys_to_machine(unsigned long pfn) +{ + unsigned topidx, idx; + + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) + return INVALID_P2M_ENTRY; + + topidx = p2m_top_index(pfn); + idx = p2m_index(pfn); + return p2m_top[topidx][idx]; +} +EXPORT_SYMBOL_GPL(get_phys_to_machine); + +static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) +{ + unsigned long *p; + unsigned i; + + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) + p[i] = INVALID_P2M_ENTRY; + + if (cmpxchg(pp, p2m_missing, p) != p2m_missing) + free_page((unsigned long)p); + else + *mfnp = virt_to_mfn(p); +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { + BUG_ON(mfn != INVALID_P2M_ENTRY); + return; + } + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == p2m_missing) { + /* no need to allocate a page to store an invalid entry */ + if (mfn == INVALID_P2M_ENTRY) + return; + alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); + } + + idx = p2m_index(pfn); + p2m_top[topidx][idx] = mfn; +} + xmaddr_t arbitrary_virt_to_machine(unsigned long address) { unsigned int level; @@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) } -void xen_set_pmd(pmd_t *ptr, pmd_t val) +static bool page_pinned(void *ptr) +{ + struct page *page = virt_to_page(ptr); + + return PagePinned(page); +} + +static void extend_mmu_update(const struct mmu_update *update) { struct multicall_space mcs; struct mmu_update *u; - preempt_disable(); + mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); + + if (mcs.mc != NULL) + mcs.mc->args[1]++; + else { + mcs = __xen_mc_entry(sizeof(*u)); + MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); + } - mcs = xen_mc_entry(sizeof(*u)); u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pmd_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + *u = *update; +} + +void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) +{ + struct mmu_update u; + + preempt_disable(); + + xen_mc_batch(); + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pmd_val_ma(val); + extend_mmu_update(&u); xen_mc_issue(PARAVIRT_LAZY_MMU); preempt_enable(); } +void xen_set_pmd(pmd_t *ptr, pmd_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pmd_hyper(ptr, val); +} + /* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. @@ -179,13 +340,33 @@ out: preempt_enable(); } +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + /* Just return the pte as-is. We preserve the bits on commit */ + return *ptep; +} + +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + struct mmu_update u; + + xen_mc_batch(); + + u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u.val = pte_val_ma(pte); + extend_mmu_update(&u); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + /* Assume pteval_t is equivalent to all the other *val_t types. */ static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; pteval_t flags = val & ~PTE_MASK; - val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; } return val; @@ -196,7 +377,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; pteval_t flags = val & ~PTE_MASK; - val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; + val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; } return val; @@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd) return pte_mfn_to_pfn(pmd.pmd); } -void xen_set_pud(pud_t *ptr, pud_t val) +void xen_set_pud_hyper(pud_t *ptr, pud_t val) { - struct multicall_space mcs; - struct mmu_update *u; + struct mmu_update u; preempt_disable(); - mcs = xen_mc_entry(sizeof(*u)); - u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pud_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + xen_mc_batch(); + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pud_val_ma(val); + extend_mmu_update(&u); xen_mc_issue(PARAVIRT_LAZY_MMU); preempt_enable(); } +void xen_set_pud(pud_t *ptr, pud_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pud_hyper(ptr, val); +} + void xen_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; @@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) void xen_pmd_clear(pmd_t *pmdp) { - xen_set_pmd(pmdp, __pmd(0)); + set_pmd(pmdp, __pmd(0)); } pmd_t xen_make_pmd(pmdval_t pmd) @@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); @@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) spin_lock(&mm->page_table_lock); /* pgd may not be pinned in the error exit path of execve */ - if (PagePinned(virt_to_page(mm->pgd))) + if (page_pinned(mm->pgd)) xen_pgd_unpin(mm->pgd); spin_unlock(&mm->page_table_lock); diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe961caffd4..297bf9f5b8bc 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -25,10 +25,6 @@ enum pt_level { void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -void xen_set_pte(pte_t *ptep, pte_t pteval); -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); -void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); @@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t); pmd_t xen_make_pmd(pmdval_t); pgd_t xen_make_pgd(pgdval_t); +void xen_set_pte(pte_t *ptep, pte_t pteval); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud(pud_t *ptr, pud_t val); +void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); +void xen_set_pud_hyper(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e3750..3c63c4da7ed1 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -29,14 +29,14 @@ #define MC_DEBUG 1 #define MC_BATCH 32 -#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) +#define MC_ARGS (MC_BATCH * 16) struct mc_buffer { struct multicall_entry entries[MC_BATCH]; #if MC_DEBUG struct multicall_entry debug[MC_BATCH]; #endif - u64 args[MC_ARGS]; + unsigned char args[MC_ARGS]; struct callback { void (*fn)(void *); void *data; @@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) { struct mc_buffer *b = &__get_cpu_var(mc_buffer); struct multicall_space ret; - unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); + unsigned argidx = roundup(b->argidx, sizeof(u64)); BUG_ON(preemptible()); - BUG_ON(argspace > MC_ARGS); + BUG_ON(b->argidx > MC_ARGS); if (b->mcidx == MC_BATCH || - (b->argidx + argspace) > MC_ARGS) + (argidx + args) > MC_ARGS) { xen_mc_flush(); + argidx = roundup(b->argidx, sizeof(u64)); + } ret.mc = &b->entries[b->mcidx]; b->mcidx++; + ret.args = &b->args[argidx]; + b->argidx = argidx + args; + + BUG_ON(b->argidx > MC_ARGS); + return ret; +} + +struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) +{ + struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct multicall_space ret = { NULL, NULL }; + + BUG_ON(preemptible()); + BUG_ON(b->argidx > MC_ARGS); + + if (b->mcidx == 0) + return ret; + + if (b->entries[b->mcidx - 1].op != op) + return ret; + + if ((b->argidx + size) > MC_ARGS) + return ret; + + ret.mc = &b->entries[b->mcidx - 1]; ret.args = &b->args[b->argidx]; - b->argidx += argspace; + b->argidx += size; + BUG_ON(b->argidx > MC_ARGS); return ret; } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a3..858938241616 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) /* Set up a callback to be called when the current batch is flushed */ void xen_mc_callback(void (*fn)(void *), void *data); +/* + * Try to extend the arguments of the previous multicall command. The + * previous command's op must match. If it does, then it attempts to + * extend the argument space allocated to the multicall entry by + * arg_size bytes. + * + * The returned multicall_space will return with mc pointing to the + * command on success, or NULL on failure, and args pointing to the + * newly allocated space. + */ +struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); + #endif /* _XEN_MULTICALLS_H */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..e0a39595bde3 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -13,9 +13,11 @@ #include <asm/vdso.h> #include <asm/e820.h> #include <asm/setup.h> +#include <asm/acpi.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/page.h> #include <xen/interface/callback.h> #include <xen/interface/physdev.h> #include <xen/features.h> @@ -27,8 +29,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -unsigned long *phys_to_machine_mapping; -EXPORT_SYMBOL(phys_to_machine_mapping); /** * machine_specific_memory_setup - Hook for machine specific memory setup. @@ -38,9 +38,31 @@ char * __init xen_memory_setup(void) { unsigned long max_pfn = xen_start_info->nr_pages; + max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); + + e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); + + /* + * Even though this is normal, usable memory under Xen, reserve + * ISA memory anyway because too many things think they can poke + * about in there. + */ + e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, + E820_RESERVED); + + /* + * Reserve Xen bits: + * - mfn_list + * - xen_start_info + * See comment above "struct start_info" in <xen/interface/xen.h> + */ + e820_add_region(__pa(xen_start_info->mfn_list), + xen_start_info->pt_base - xen_start_info->mfn_list, + E820_RESERVED); + + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return "Xen"; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f982..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; @@ -65,6 +65,12 @@ static struct call_data_struct *call_data; */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { +#ifdef CONFIG_X86_32 + __get_cpu_var(irq_stat).irq_resched_count++; +#else + add_pda(irq_resched_count, 1); +#endif + return IRQ_HANDLED; } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..251669a932d4 --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,45 @@ +#include <linux/types.h> + +#include <xen/interface/xen.h> +#include <xen/grant_table.h> +#include <xen/events.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + xen_setup_shared_info(); + + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + xen_vcpu_restore(); + xen_timer_resume(); + } + +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 41e217503c96..685b77470fc3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void) } -/* Get the CPU speed from Xen */ -unsigned long xen_cpu_khz(void) +/* Get the TSC speed from Xen */ +unsigned long xen_tsc_khz(void) { u64 xen_khz = 1000000ULL << 32; const struct pvclock_vcpu_time_info *info = @@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_timer_resume(void) +{ + int cpu; + + if (xen_clockevent != &xen_vcpuop_clockevent) + return; + + for_each_online_cpu(cpu) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + BUG(); + } +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6ec3b4f7719b..7c0cf6320a0a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -7,6 +7,7 @@ #include <linux/init.h> #include <asm/boot.h> #include <xen/interface/elfnote.h> +#include <asm/xen/interface.h> __INIT ENTRY(startup_xen) @@ -32,5 +33,9 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, + .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..d852ddbb3448 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,26 +9,35 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); extern struct start_info *xen_start_info; +extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; +void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); + char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void xen_vcpu_restore(void); + +void __init xen_build_dynamic_phys_to_machine(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); -unsigned long xen_cpu_khz(void); +unsigned long xen_tsc_khz(void); void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); +void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/block/as-iosched.c b/block/as-iosched.c index 8c3946787dbb..743f33a01a07 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -831,6 +831,8 @@ static void as_completed_request(struct request_queue *q, struct request *rq) } if (ad->changed_batch && ad->nr_dispatched == 1) { + ad->current_batch_expires = jiffies + + ad->batch_expire[ad->batch_data_dir]; kblockd_schedule_work(&ad->antic_work); ad->changed_batch = 0; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index c52fca833268..860f15f36ce9 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -4,7 +4,6 @@ menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" - depends on !X86_NUMAQ depends on !X86_VISWS depends on !IA64_HP_SIM depends on IA64 || X86 diff --git a/drivers/acpi/bay.c b/drivers/acpi/bay.c index 26038c2a2a71..61b6c5beb2d3 100644 --- a/drivers/acpi/bay.c +++ b/drivers/acpi/bay.c @@ -377,6 +377,9 @@ static int __init bay_init(void) INIT_LIST_HEAD(&drive_bays); + if (acpi_disabled) + return -ENODEV; + /* look for dockable drive bays */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, find_bay, &bays, NULL); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 96c542f7fded..bb7c51f712bd 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -917,6 +917,9 @@ static int __init dock_init(void) dock_station = NULL; + if (acpi_disabled) + return 0; + /* look for a dock station */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, find_dock, &num, NULL); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 2808dc60fd67..9b227d4dc9c9 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -333,6 +333,9 @@ static int __init acpi_rtc_init(void) { struct device *dev = get_rtc_dev(); + if (acpi_disabled) + return 0; + if (dev) { rtc_wake_setup(); rtc_info.wake_on = rtc_wake_on; diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index c3b0cd88d09f..495c63a3e0af 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -36,9 +36,8 @@ static int acpi_sleep_prepare(u32 acpi_state) if (!acpi_wakeup_address) { return -EFAULT; } - acpi_set_firmware_waking_vector((acpi_physical_address) - virt_to_phys((void *) - acpi_wakeup_address)); + acpi_set_firmware_waking_vector( + (acpi_physical_address)acpi_wakeup_address); } ACPI_FLUSH_CPU_CACHE(); diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 224c57c03381..4ebbba2b6b19 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -315,8 +315,11 @@ acpi_system_write_alarm(struct file *file, cmos_bcd_write(day, acpi_gbl_FADT.day_alarm, rtc_control); if (acpi_gbl_FADT.month_alarm) cmos_bcd_write(mo, acpi_gbl_FADT.month_alarm, rtc_control); - if (acpi_gbl_FADT.century) + if (acpi_gbl_FADT.century) { + if (adjust) + yr += cmos_bcd_read(acpi_gbl_FADT.century, rtc_control) * 100; cmos_bcd_write(yr / 100, acpi_gbl_FADT.century, rtc_control); + } /* enable the rtc alarm interrupt */ rtc_control |= RTC_AIE; CMOS_WRITE(rtc_control, RTC_CONTROL); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6a4a2a25d97a..5e6468a7ca4b 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1777,7 +1777,7 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance) struct ahci_host_priv *hpriv; unsigned int i, handled = 0; void __iomem *mmio; - u32 irq_stat, irq_ack = 0; + u32 irq_stat, irq_masked; VPRINTK("ENTER\n"); @@ -1786,16 +1786,17 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance) /* sigh. 0xffffffff is a valid return from h/w */ irq_stat = readl(mmio + HOST_IRQ_STAT); - irq_stat &= hpriv->port_map; if (!irq_stat) return IRQ_NONE; + irq_masked = irq_stat & hpriv->port_map; + spin_lock(&host->lock); for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; - if (!(irq_stat & (1 << i))) + if (!(irq_masked & (1 << i))) continue; ap = host->ports[i]; @@ -1809,14 +1810,20 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance) "interrupt on disabled port %u\n", i); } - irq_ack |= (1 << i); - } - - if (irq_ack) { - writel(irq_ack, mmio + HOST_IRQ_STAT); handled = 1; } + /* HOST_IRQ_STAT behaves as level triggered latch meaning that + * it should be cleared after all the port events are cleared; + * otherwise, it will raise a spurious interrupt after each + * valid one. Please read section 10.6.2 of ahci 1.1 for more + * information. + * + * Also, use the unmasked value to clear interrupt as spurious + * pending event on a dummy port might cause screaming IRQ. + */ + writel(irq_stat, mmio + HOST_IRQ_STAT); + spin_unlock(&host->lock); VPRINTK("EXIT\n"); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 215d18672a5a..c0908c225483 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1094,6 +1094,7 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq) { + struct ata_eh_info *ehi = &ap->link.eh_info; unsigned long flags = 0; int poll_next; @@ -1125,9 +1126,12 @@ fsm_start: if (likely(status & (ATA_ERR | ATA_DF))) /* device stops HSM for abort/error */ qc->err_mask |= AC_ERR_DEV; - else + else { /* HSM violation. Let EH handle this */ + ata_ehi_push_desc(ehi, + "ST_FIRST: !(DRQ|ERR|DF)"); qc->err_mask |= AC_ERR_HSM; + } ap->hsm_task_state = HSM_ST_ERR; goto fsm_start; @@ -1146,9 +1150,9 @@ fsm_start: * the CDB. */ if (!(qc->dev->horkage & ATA_HORKAGE_STUCK_ERR)) { - ata_port_printk(ap, KERN_WARNING, - "DRQ=1 with device error, " - "dev_stat 0x%X\n", status); + ata_ehi_push_desc(ehi, "ST_FIRST: " + "DRQ=1 with device error, " + "dev_stat 0x%X", status); qc->err_mask |= AC_ERR_HSM; ap->hsm_task_state = HSM_ST_ERR; goto fsm_start; @@ -1205,9 +1209,9 @@ fsm_start: * let the EH abort the command or reset the device. */ if (unlikely(status & (ATA_ERR | ATA_DF))) { - ata_port_printk(ap, KERN_WARNING, "DRQ=1 with " - "device error, dev_stat 0x%X\n", - status); + ata_ehi_push_desc(ehi, "ST-ATAPI: " + "DRQ=1 with device error, " + "dev_stat 0x%X", status); qc->err_mask |= AC_ERR_HSM; ap->hsm_task_state = HSM_ST_ERR; goto fsm_start; @@ -1226,13 +1230,17 @@ fsm_start: if (likely(status & (ATA_ERR | ATA_DF))) /* device stops HSM for abort/error */ qc->err_mask |= AC_ERR_DEV; - else + else { /* HSM violation. Let EH handle this. * Phantom devices also trigger this * condition. Mark hint. */ + ata_ehi_push_desc(ehi, "ST-ATA: " + "DRQ=1 with device error, " + "dev_stat 0x%X", status); qc->err_mask |= AC_ERR_HSM | AC_ERR_NODEV_HINT; + } ap->hsm_task_state = HSM_ST_ERR; goto fsm_start; @@ -1257,8 +1265,12 @@ fsm_start: status = ata_wait_idle(ap); } - if (status & (ATA_BUSY | ATA_DRQ)) + if (status & (ATA_BUSY | ATA_DRQ)) { + ata_ehi_push_desc(ehi, "ST-ATA: " + "BUSY|DRQ persists on ERR|DF, " + "dev_stat 0x%X", status); qc->err_mask |= AC_ERR_HSM; + } /* ata_pio_sectors() might change the * state to HSM_ST_LAST. so, the state diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 28092bc50146..ad169ffbc4cb 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1607,7 +1607,7 @@ static unsigned int mv_qc_issue(struct ata_queued_cmd *qc) * Much of the time, this could just work regardless. * So for now, just log the incident, and allow the attempt. */ - if (limit_warnings && (qc->nbytes / qc->sect_size) > 1) { + if (limit_warnings > 0 && (qc->nbytes / qc->sect_size) > 1) { --limit_warnings; ata_link_printk(qc->dev->link, KERN_WARNING, DRV_NAME ": attempting PIO w/multiple DRQ: " diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 8ee6b5b4ede7..84ffcc26a74b 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -370,6 +370,7 @@ static const struct pci_device_id sil24_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3124), BID_SIL3124 }, { PCI_VDEVICE(CMD, 0x3132), BID_SIL3132 }, { PCI_VDEVICE(CMD, 0x0242), BID_SIL3132 }, + { PCI_VDEVICE(CMD, 0x0244), BID_SIL3132 }, { PCI_VDEVICE(CMD, 0x3131), BID_SIL3131 }, { PCI_VDEVICE(CMD, 0x3531), BID_SIL3131 }, diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index f277cea904ce..db529b849948 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -83,6 +83,7 @@ static struct ata_port_operations uli_ops = { .inherits = &ata_bmdma_port_ops, .scr_read = uli_scr_read, .scr_write = uli_scr_write, + .hardreset = ATA_OP_NULL, }; static const struct ata_port_info uli_port_info = { diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 043353bd0600..14b9d5f4c203 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -64,7 +64,7 @@ config KS0108_DELAY Amount of time the ks0108 should wait between each control write to the parallel port. - If your driver seems to miss random writings, increment this. + If your LCD seems to miss random writings, increment this. If you don't know what I'm talking about, ignore it. diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index 80bb06105387..683509f013ab 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -5,7 +5,7 @@ * License: GPLv2 * Depends: ks0108 * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify @@ -398,5 +398,5 @@ module_init(cfag12864b_init); module_exit(cfag12864b_exit); MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); +MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); MODULE_DESCRIPTION("cfag12864b LCD driver"); diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c index 307c190699e0..fe3a865be4e5 100644 --- a/drivers/auxdisplay/cfag12864bfb.c +++ b/drivers/auxdisplay/cfag12864bfb.c @@ -5,7 +5,7 @@ * License: GPLv2 * Depends: cfag12864b * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify @@ -186,5 +186,5 @@ module_init(cfag12864bfb_init); module_exit(cfag12864bfb_exit); MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); +MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); MODULE_DESCRIPTION("cfag12864b LCD framebuffer driver"); diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c index e6c3646ef18c..5b93852392b8 100644 --- a/drivers/auxdisplay/ks0108.c +++ b/drivers/auxdisplay/ks0108.c @@ -5,7 +5,7 @@ * License: GPLv2 * Depends: parport * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify @@ -173,6 +173,6 @@ module_init(ks0108_init); module_exit(ks0108_exit); MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); +MODULE_AUTHOR("Miguel Ojeda Sandonis <[email protected]>"); MODULE_DESCRIPTION("ks0108 LCD Controller driver"); diff --git a/drivers/base/node.c b/drivers/base/node.c index 39f3d1b3a213..0f867a083338 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -84,8 +84,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), - nid, node_page_state(nid, NR_ACTIVE), - nid, node_page_state(nid, NR_INACTIVE), + nid, K(node_page_state(nid, NR_ACTIVE)), + nid, K(node_page_state(nid, NR_INACTIVE)), #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 2b4b392dcbc1..87a7f1d02578 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -153,7 +153,7 @@ EXPORT_SYMBOL(set_trace_device); * it's not any guarantee, but it's a high _likelihood_ that * the match is valid). */ -void generate_resume_trace(void *tracedata, unsigned int user) +void generate_resume_trace(const void *tracedata, unsigned int user) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index fdf4044d2e74..1efe162e16d7 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ } +#if defined(topology_thread_siblings) || defined(topology_core_siblings) static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) { ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; @@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) } return n; } +#endif +#ifdef arch_provides_topology_pointers #define define_siblings_show_map(name) \ -static inline ssize_t show_##name(struct sys_device *dev, char *buf) \ +static ssize_t show_##name(struct sys_device *dev, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(0, &(topology_##name(cpu)), buf); \ } #define define_siblings_show_list(name) \ -static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(1, &(topology_##name(cpu)), buf); \ } +#else +#define define_siblings_show_map(name) \ +static ssize_t show_##name(struct sys_device *dev, char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + cpumask_t mask = topology_##name(cpu); \ + return show_cpumap(0, &mask, buf); \ +} + +#define define_siblings_show_list(name) \ +static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + cpumask_t mask = topology_##name(cpu); \ + return show_cpumap(1, &mask, buf); \ +} +#endif + #define define_siblings_show_func(name) \ define_siblings_show_map(name); define_siblings_show_list(name) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 5f1e1cc6165a..d81632cd7d06 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -106,35 +106,34 @@ MODULE_DEVICE_TABLE(pci, cciss_pci_device_id); /* board_id = Subsystem Device ID & Vendor ID * product = Marketing Name for the board * access = Address of the struct of function pointers - * nr_cmds = Number of commands supported by controller */ static struct board_type products[] = { - {0x40700E11, "Smart Array 5300", &SA5_access, 512}, - {0x40800E11, "Smart Array 5i", &SA5B_access, 512}, - {0x40820E11, "Smart Array 532", &SA5B_access, 512}, - {0x40830E11, "Smart Array 5312", &SA5B_access, 512}, - {0x409A0E11, "Smart Array 641", &SA5_access, 512}, - {0x409B0E11, "Smart Array 642", &SA5_access, 512}, - {0x409C0E11, "Smart Array 6400", &SA5_access, 512}, - {0x409D0E11, "Smart Array 6400 EM", &SA5_access, 512}, - {0x40910E11, "Smart Array 6i", &SA5_access, 512}, - {0x3225103C, "Smart Array P600", &SA5_access, 512}, - {0x3223103C, "Smart Array P800", &SA5_access, 512}, - {0x3234103C, "Smart Array P400", &SA5_access, 512}, - {0x3235103C, "Smart Array P400i", &SA5_access, 512}, - {0x3211103C, "Smart Array E200i", &SA5_access, 120}, - {0x3212103C, "Smart Array E200", &SA5_access, 120}, - {0x3213103C, "Smart Array E200i", &SA5_access, 120}, - {0x3214103C, "Smart Array E200i", &SA5_access, 120}, - {0x3215103C, "Smart Array E200i", &SA5_access, 120}, - {0x3237103C, "Smart Array E500", &SA5_access, 512}, - {0x323D103C, "Smart Array P700m", &SA5_access, 512}, - {0x3241103C, "Smart Array P212", &SA5_access, 384}, - {0x3243103C, "Smart Array P410", &SA5_access, 384}, - {0x3245103C, "Smart Array P410i", &SA5_access, 384}, - {0x3247103C, "Smart Array P411", &SA5_access, 384}, - {0x3249103C, "Smart Array P812", &SA5_access, 384}, - {0xFFFF103C, "Unknown Smart Array", &SA5_access, 120}, + {0x40700E11, "Smart Array 5300", &SA5_access}, + {0x40800E11, "Smart Array 5i", &SA5B_access}, + {0x40820E11, "Smart Array 532", &SA5B_access}, + {0x40830E11, "Smart Array 5312", &SA5B_access}, + {0x409A0E11, "Smart Array 641", &SA5_access}, + {0x409B0E11, "Smart Array 642", &SA5_access}, + {0x409C0E11, "Smart Array 6400", &SA5_access}, + {0x409D0E11, "Smart Array 6400 EM", &SA5_access}, + {0x40910E11, "Smart Array 6i", &SA5_access}, + {0x3225103C, "Smart Array P600", &SA5_access}, + {0x3223103C, "Smart Array P800", &SA5_access}, + {0x3234103C, "Smart Array P400", &SA5_access}, + {0x3235103C, "Smart Array P400i", &SA5_access}, + {0x3211103C, "Smart Array E200i", &SA5_access}, + {0x3212103C, "Smart Array E200", &SA5_access}, + {0x3213103C, "Smart Array E200i", &SA5_access}, + {0x3214103C, "Smart Array E200i", &SA5_access}, + {0x3215103C, "Smart Array E200i", &SA5_access}, + {0x3237103C, "Smart Array E500", &SA5_access}, + {0x323D103C, "Smart Array P700m", &SA5_access}, + {0x3241103C, "Smart Array P212", &SA5_access}, + {0x3243103C, "Smart Array P410", &SA5_access}, + {0x3245103C, "Smart Array P410i", &SA5_access}, + {0x3247103C, "Smart Array P411", &SA5_access}, + {0x3249103C, "Smart Array P812", &SA5_access}, + {0xFFFF103C, "Unknown Smart Array", &SA5_access}, }; /* How long to wait (in milliseconds) for board to go into simple mode */ @@ -3086,11 +3085,20 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) print_cfg_table(c->cfgtable); #endif /* CCISS_DEBUG */ + /* Some controllers support Zero Memory Raid (ZMR). + * When configured in ZMR mode the number of supported + * commands drops to 64. So instead of just setting an + * arbitrary value we make the driver a little smarter. + * We read the config table to tell us how many commands + * are supported on the controller then subtract 4 to + * leave a little room for ioctl calls. + */ + c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); for (i = 0; i < ARRAY_SIZE(products); i++) { if (board_id == products[i].board_id) { c->product_name = products[i].product_name; c->access = *(products[i].access); - c->nr_cmds = products[i].nr_cmds; + c->nr_cmds = c->max_commands - 4; break; } } @@ -3110,7 +3118,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) if (subsystem_vendor_id == PCI_VENDOR_ID_HP) { c->product_name = products[i-1].product_name; c->access = *(products[i-1].access); - c->nr_cmds = products[i-1].nr_cmds; + c->nr_cmds = c->max_commands - 4; printk(KERN_WARNING "cciss: This is an unknown " "Smart Array controller.\n" "cciss: Please update to the latest driver " @@ -3546,6 +3554,10 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, for (j = 0; j <= hba[i]->highest_lun; j++) add_disk(hba[i]->gendisk[j]); + /* we must register the controller even if no disks exist */ + if (hba[i]->highest_lun == -1) + add_disk(hba[i]->gendisk[0]); + return 1; clean4: diff --git a/drivers/char/drm/i915_irq.c b/drivers/char/drm/i915_irq.c index f7f16e7a8bf3..df036118b8b1 100644 --- a/drivers/char/drm/i915_irq.c +++ b/drivers/char/drm/i915_irq.c @@ -62,11 +62,11 @@ static void i915_vblank_tasklet(struct drm_device *dev) u32 ropcpp = (0xcc << 16) | ((cpp - 1) << 24); RING_LOCALS; - if (sarea_priv->front_tiled) { + if (IS_I965G(dev) && sarea_priv->front_tiled) { cmd |= XY_SRC_COPY_BLT_DST_TILED; dst_pitch >>= 2; } - if (sarea_priv->back_tiled) { + if (IS_I965G(dev) && sarea_priv->back_tiled) { cmd |= XY_SRC_COPY_BLT_SRC_TILED; src_pitch >>= 2; } diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index dd68f8541c2d..db2ae4216279 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -39,9 +39,14 @@ static int xencons_irq; /* ------------------------------------------------------------------ */ +static unsigned long console_pfn = ~0ul; + static inline struct xencons_interface *xencons_interface(void) { - return mfn_to_virt(xen_start_info->console.domU.mfn); + if (console_pfn == ~0ul) + return mfn_to_virt(xen_start_info->console.domU.mfn); + else + return __va(console_pfn << PAGE_SHIFT); } static inline void notify_daemon(void) @@ -101,20 +106,32 @@ static int __init xen_init(void) { struct hvc_struct *hp; - if (!is_running_on_xen()) - return 0; + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) + return -ENODEV; xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); if (xencons_irq < 0) - xencons_irq = 0 /* NO_IRQ */; + xencons_irq = 0; /* NO_IRQ */ + hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); hvc = hp; + + console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); + return 0; } +void xen_console_resume(void) +{ + if (xencons_irq) + rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); +} + static void __exit xen_fini(void) { if (hvc) @@ -134,12 +151,28 @@ module_init(xen_init); module_exit(xen_fini); console_initcall(xen_cons_init); +static void raw_console_write(const char *str, int len) +{ + while(len > 0) { + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc <= 0) + break; + + str += rc; + len -= rc; + } +} + +#ifdef CONFIG_EARLY_PRINTK static void xenboot_write_console(struct console *console, const char *string, unsigned len) { unsigned int linelen, off = 0; const char *pos; + raw_console_write(string, len); + + write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) @@ -155,5 +188,23 @@ static void xenboot_write_console(struct console *console, const char *string, struct console xenboot_console = { .name = "xenboot", .write = xenboot_write_console, - .flags = CON_PRINTBUFFER | CON_BOOT, + .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME, }; +#endif /* CONFIG_EARLY_PRINTK */ + +void xen_raw_console_write(const char *str) +{ + raw_console_write(str, strlen(str)); +} + +void xen_raw_printk(const char *fmt, ...) +{ + static char buf[512]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + xen_raw_console_write(buf); +} diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index e94bee032314..750131010af0 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -3322,7 +3322,7 @@ static int send_break(struct tty_struct *tty, unsigned int duration) msleep_interruptible(duration); tty->ops->break_ctl(tty, 0); tty_write_unlock(tty); - if (!signal_pending(current)) + if (signal_pending(current)) return -EINTR; return 0; } diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 85e2ba7fcfba..bf4830082a13 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -27,6 +27,8 @@ #include <linux/moduleparam.h> #include <linux/connector.h> #include <linux/mutex.h> +#include <linux/proc_fs.h> +#include <linux/spinlock.h> #include <net/sock.h> @@ -403,6 +405,40 @@ static void cn_callback(void *data) mutex_unlock(¬ify_lock); } +static int cn_proc_show(struct seq_file *m, void *v) +{ + struct cn_queue_dev *dev = cdev.cbdev; + struct cn_callback_entry *cbq; + + seq_printf(m, "Name ID\n"); + + spin_lock_bh(&dev->queue_lock); + + list_for_each_entry(cbq, &dev->queue_list, callback_entry) { + seq_printf(m, "%-15s %u:%u\n", + cbq->id.name, + cbq->id.id.idx, + cbq->id.id.val); + } + + spin_unlock_bh(&dev->queue_lock); + + return 0; +} + +static int cn_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cn_proc_show, NULL); +} + +static const struct file_operations cn_file_ops = { + .owner = THIS_MODULE, + .open = cn_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + static int __devinit cn_init(void) { struct cn_dev *dev = &cdev; @@ -434,6 +470,8 @@ static int __devinit cn_init(void) return -EINVAL; } + proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops); + return 0; } @@ -443,6 +481,8 @@ static void __devexit cn_fini(void) cn_already_initialized = 0; + proc_net_remove(&init_net, "connector"); + cn_del_callback(&dev->id); cn_queue_free_dev(dev->cbdev); netlink_kernel_release(dev->nls); diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index b2458bb8e9ca..227d2e036cd8 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -1051,7 +1051,8 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory, break; case SBP2_CSR_LOGICAL_UNIT_DIRECTORY: - if (sbp2_scan_logical_unit_dir(tgt, ci.p + value) < 0) + /* Adjust for the increment in the iterator */ + if (sbp2_scan_logical_unit_dir(tgt, ci.p - 1 + value) < 0) return -ENOMEM; break; } diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index dc2cec6127d1..ebb9e51deb0c 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -26,6 +26,16 @@ config EDD_OFF kernel. Say N if you want EDD enabled by default. EDD can be dynamically set using the kernel parameter 'edd={on|skipmbr|off}'. +config FIRMWARE_MEMMAP + bool "Add firmware-provided memory map to sysfs" if EMBEDDED + default (X86_64 || X86_32) + help + Add the firmware-provided (unmodified) memory map to /sys/firmware/memmap. + That memory map is used for example by kexec to set up parameter area + for the next kernel, but can also be used for debugging purposes. + + See also Documentation/ABI/testing/sysfs-firmware-memmap. + config EFI_VARS tristate "EFI Variable Support via sysfs" depends on EFI diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 4c9147154df8..1c3c17343dbe 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_DCDBAS) += dcdbas.o obj-$(CONFIG_DMIID) += dmi-id.o obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o +obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index c5e3ed7e903b..455575be3560 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -8,6 +8,11 @@ #include <linux/slab.h> #include <asm/dmi.h> +/* + * DMI stands for "Desktop Management Interface". It is part + * of and an antecedent to, SMBIOS, which stands for System + * Management BIOS. See further: http://www.dmtf.org/standards + */ static char dmi_empty_string[] = " "; static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c new file mode 100644 index 000000000000..e23399c7f773 --- /dev/null +++ b/drivers/firmware/memmap.c @@ -0,0 +1,205 @@ +/* + * linux/drivers/firmware/memmap.c + * Copyright (C) 2008 SUSE LINUX Products GmbH + * by Bernhard Walle <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/string.h> +#include <linux/firmware-map.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/bootmem.h> + +/* + * Data types ------------------------------------------------------------------ + */ + +/* + * Firmware map entry. Because firmware memory maps are flat and not + * hierarchical, it's ok to organise them in a linked list. No parent + * information is necessary as for the resource tree. + */ +struct firmware_map_entry { + resource_size_t start; /* start of the memory range */ + resource_size_t end; /* end of the memory range (incl.) */ + const char *type; /* type of the memory range */ + struct list_head list; /* entry for the linked list */ + struct kobject kobj; /* kobject for each entry */ +}; + +/* + * Forward declarations -------------------------------------------------------- + */ +static ssize_t memmap_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf); +static ssize_t start_show(struct firmware_map_entry *entry, char *buf); +static ssize_t end_show(struct firmware_map_entry *entry, char *buf); +static ssize_t type_show(struct firmware_map_entry *entry, char *buf); + +/* + * Static data ----------------------------------------------------------------- + */ + +struct memmap_attribute { + struct attribute attr; + ssize_t (*show)(struct firmware_map_entry *entry, char *buf); +}; + +struct memmap_attribute memmap_start_attr = __ATTR_RO(start); +struct memmap_attribute memmap_end_attr = __ATTR_RO(end); +struct memmap_attribute memmap_type_attr = __ATTR_RO(type); + +/* + * These are default attributes that are added for every memmap entry. + */ +static struct attribute *def_attrs[] = { + &memmap_start_attr.attr, + &memmap_end_attr.attr, + &memmap_type_attr.attr, + NULL +}; + +static struct sysfs_ops memmap_attr_ops = { + .show = memmap_attr_show, +}; + +static struct kobj_type memmap_ktype = { + .sysfs_ops = &memmap_attr_ops, + .default_attrs = def_attrs, +}; + +/* + * Registration functions ------------------------------------------------------ + */ + +/* + * Firmware memory map entries + */ +static LIST_HEAD(map_entries); + +/** + * Common implementation of firmware_map_add() and firmware_map_add_early() + * which expects a pre-allocated struct firmware_map_entry. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * @entry: Pre-allocated (either kmalloc() or bootmem allocator), uninitialised + * entry. + */ +static int firmware_map_add_entry(resource_size_t start, resource_size_t end, + const char *type, + struct firmware_map_entry *entry) +{ + BUG_ON(start > end); + + entry->start = start; + entry->end = end; + entry->type = type; + INIT_LIST_HEAD(&entry->list); + kobject_init(&entry->kobj, &memmap_ktype); + + list_add_tail(&entry->list, &map_entries); + + return 0; +} + +/* + * See <linux/firmware-map.h> for documentation. + */ +int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type) +{ + struct firmware_map_entry *entry; + + entry = kmalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); + WARN_ON(!entry); + if (!entry) + return -ENOMEM; + + return firmware_map_add_entry(start, end, type, entry); +} + +/* + * See <linux/firmware-map.h> for documentation. + */ +int __init firmware_map_add_early(resource_size_t start, resource_size_t end, + const char *type) +{ + struct firmware_map_entry *entry; + + entry = alloc_bootmem_low(sizeof(struct firmware_map_entry)); + WARN_ON(!entry); + if (!entry) + return -ENOMEM; + + return firmware_map_add_entry(start, end, type, entry); +} + +/* + * Sysfs functions ------------------------------------------------------------- + */ + +static ssize_t start_show(struct firmware_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->start); +} + +static ssize_t end_show(struct firmware_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->end); +} + +static ssize_t type_show(struct firmware_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", entry->type); +} + +#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr) +#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj) + +static ssize_t memmap_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct firmware_map_entry *entry = to_memmap_entry(kobj); + struct memmap_attribute *memmap_attr = to_memmap_attr(attr); + + return memmap_attr->show(entry, buf); +} + +/* + * Initialises stuff and adds the entries in the map_entries list to + * sysfs. Important is that firmware_map_add() and firmware_map_add_early() + * must be called before late_initcall. + */ +static int __init memmap_init(void) +{ + int i = 0; + struct firmware_map_entry *entry; + struct kset *memmap_kset; + + memmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj); + WARN_ON(!memmap_kset); + if (!memmap_kset) + return -ENOMEM; + + list_for_each_entry(entry, &map_entries, list) { + entry->kobj.kset = memmap_kset; + kobject_add(&entry->kobj, NULL, "%d", i++); + } + + return 0; +} +late_initcall(memmap_init); + diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index bbd28342e771..008c38ba774f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -28,12 +28,18 @@ config DEBUG_GPIO comment "I2C GPIO expanders:" config GPIO_PCA953X - tristate "PCA953x I/O ports" + tristate "PCA953x, PCA955x, and MAX7310 I/O ports" depends on I2C help - Say yes here to support the PCA9534 (8-bit), PCA9535 (16-bit), - PCA9536 (4-bit), PCA9537 (4-bit), PCA9538 (8-bit), and PCA9539 - (16-bit) I/O ports. These parts are made by NXP and TI. + Say yes here to provide access to several register-oriented + SMBus I/O expanders, made mostly by NXP or TI. Compatible + models include: + + 4 bits: pca9536, pca9537 + + 8 bits: max7310, pca9534, pca9538, pca9554, pca9557 + + 16 bits: pca9535, pca9539, pca9555 This driver can also be built as a module. If so, the module will be called pca953x. diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 7e40e8a55edf..a380730b61ab 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -33,7 +33,7 @@ static const struct i2c_device_id pca953x_id[] = { { "pca9554", 8, }, { "pca9555", 16, }, { "pca9557", 8, }, - /* REVISIT several pca955x parts should work here too */ + { "max7310", 8, }, { } }; MODULE_DEVICE_TABLE(i2c, pca953x_id); diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c index 26df06f840eb..50f22690d611 100644 --- a/drivers/hwmon/hdaps.c +++ b/drivers/hwmon/hdaps.c @@ -516,17 +516,23 @@ static struct dmi_system_id __initdata hdaps_whitelist[] = { HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R51"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R52"), HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61i"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61"), HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T41"), HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T42"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T43"), HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61p"), HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X40"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X41"), HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61s"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61"), HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad Z60m"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61m"), + HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61p"), { .ident = NULL } }; diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 1305ef190fc1..9e8c875437be 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -290,12 +290,12 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) * bus, or started a new i2c message */ - if (iicstat & S3C2410_IICSTAT_LASTBIT && + if (iicstat & S3C2410_IICSTAT_LASTBIT && !(i2c->msg->flags & I2C_M_IGNORE_NAK)) { /* ack was not received... */ dev_dbg(i2c->dev, "ack was not received\n"); - s3c24xx_i2c_stop(i2c, -EREMOTEIO); + s3c24xx_i2c_stop(i2c, -ENXIO); goto out_ack; } @@ -305,7 +305,7 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) i2c->state = STATE_WRITE; /* terminate the transfer if there is nothing to do - * (used by the i2c probe to find devices */ + * as this is used by the i2c probe to find devices. */ if (is_lastmsg(i2c) && i2c->msg->len == 0) { s3c24xx_i2c_stop(i2c, 0); @@ -323,7 +323,17 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) * end of the message, and if so, work out what to do */ + if (!(i2c->msg->flags & I2C_M_IGNORE_NAK)) { + if (iicstat & S3C2410_IICSTAT_LASTBIT) { + dev_dbg(i2c->dev, "WRITE: No Ack\n"); + + s3c24xx_i2c_stop(i2c, -ECONNREFUSED); + goto out_ack; + } + } + retry_write: + if (!is_msgend(i2c)) { byte = i2c->msg->buf[i2c->msg_ptr++]; writeb(byte, i2c->regs + S3C2410_IICDS); @@ -377,17 +387,6 @@ static int i2s_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) * going to do any more read/write */ - if (!(i2c->msg->flags & I2C_M_IGNORE_NAK) && - !(is_msglast(i2c) && is_lastmsg(i2c))) { - - if (iicstat & S3C2410_IICSTAT_LASTBIT) { - dev_dbg(i2c->dev, "READ: No Ack\n"); - - s3c24xx_i2c_stop(i2c, -ECONNREFUSED); - goto out_ack; - } - } - byte = readb(i2c->regs + S3C2410_IICDS); i2c->msg->buf[i2c->msg_ptr++] = byte; @@ -949,3 +948,4 @@ MODULE_DESCRIPTION("S3C24XX I2C Bus driver"); MODULE_AUTHOR("Ben Dooks, <[email protected]>"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:s3c2410-i2c"); +MODULE_ALIAS("platform:s3c2440-i2c"); diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 8e07de23d220..1607536ff5fb 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -823,6 +823,13 @@ config BLK_DEV_IDE_RAPIDE Say Y here if you want to support the Yellowstone RapIDE controller manufactured for use with Acorn computers. +config BLK_DEV_IDE_BAST + tristate "Simtec BAST / Thorcom VR1000 IDE support" + depends on ARM && (ARCH_BAST || MACH_VR1000) + help + Say Y here if you want to support the onboard IDE channels on the + Simtec BAST or the Thorcom VR1000 + config IDE_H8300 tristate "H8300 IDE support" depends on H8300 diff --git a/drivers/ide/arm/Makefile b/drivers/ide/arm/Makefile index 5bc26053afa6..936e7b0237f5 100644 --- a/drivers/ide/arm/Makefile +++ b/drivers/ide/arm/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_BLK_DEV_IDE_ICSIDE) += icside.o obj-$(CONFIG_BLK_DEV_IDE_RAPIDE) += rapide.o +obj-$(CONFIG_BLK_DEV_IDE_BAST) += bast-ide.o obj-$(CONFIG_BLK_DEV_PALMCHIP_BK3710) += palm_bk3710.o ifeq ($(CONFIG_IDE_ARM), m) diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c new file mode 100644 index 000000000000..8e8c28104b45 --- /dev/null +++ b/drivers/ide/arm/bast-ide.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003-2004 Simtec Electronics + * Ben Dooks <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * +*/ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/ide.h> +#include <linux/init.h> + +#include <asm/mach-types.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/arch/map.h> +#include <asm/arch/bast-map.h> +#include <asm/arch/bast-irq.h> + +#define DRV_NAME "bast-ide" + +static int __init bastide_register(unsigned int base, unsigned int aux, int irq) +{ + ide_hwif_t *hwif; + hw_regs_t hw; + int i; + u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; + + memset(&hw, 0, sizeof(hw)); + + base += BAST_IDE_CS; + aux += BAST_IDE_CS; + + for (i = 0; i <= 7; i++) { + hw.io_ports_array[i] = (unsigned long)base; + base += 0x20; + } + + hw.io_ports.ctl_addr = aux + (6 * 0x20); + hw.irq = irq; + hw.chipset = ide_generic; + + hwif = ide_find_port(); + if (hwif == NULL) + goto out; + + i = hwif->index; + + ide_init_port_data(hwif, i); + ide_init_port_hw(hwif, &hw); + hwif->port_ops = NULL; + + idx[0] = i; + + ide_device_add(idx, NULL); +out: + return 0; +} + +static int __init bastide_init(void) +{ + unsigned long base = BAST_VA_IDEPRI + BAST_IDE_CS; + + /* we can treat the VR1000 and the BAST the same */ + + if (!(machine_is_bast() || machine_is_vr1000())) + return 0; + + printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n"); + + if (!request_mem_region(base, 0x400000, DRV_NAME)) { + printk(KERN_ERR "%s: resources busy\n", DRV_NAME); + return -EBUSY; + } + + bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0); + bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1); + + return 0; +} + +module_init(bastide_init); + +MODULE_AUTHOR("Ben Dooks <[email protected]>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simtec BAST / Thorcom VR1000 IDE driver"); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 380fa0c8cc84..d27061b39324 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -646,8 +646,6 @@ static int ide_register_port(ide_hwif_t *hwif) goto out; } - get_device(&hwif->gendev); - hwif->portdev = device_create_drvdata(ide_port_class, &hwif->gendev, MKDEV(0, 0), hwif, hwif->name); if (IS_ERR(hwif->portdev)) { diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 55ec7f798772..8af88bf0969b 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -76,7 +76,7 @@ static int proc_ide_read_mate ide_hwif_t *hwif = (ide_hwif_t *) data; int len; - if (hwif && hwif->mate && hwif->mate->present) + if (hwif && hwif->mate) len = sprintf(page, "%s\n", hwif->mate->name); else len = sprintf(page, "(none)\n"); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index c758dcb13b14..300431d080a9 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -315,13 +315,14 @@ void ide_unregister(ide_hwif_t *hwif) BUG_ON(in_interrupt()); BUG_ON(irqs_disabled()); + mutex_lock(&ide_cfg_mtx); - spin_lock_irq(&ide_lock); - if (!hwif->present) - goto abort; - __ide_port_unregister_devices(hwif); - hwif->present = 0; + spin_lock_irq(&ide_lock); + if (hwif->present) { + __ide_port_unregister_devices(hwif); + hwif->present = 0; + } spin_unlock_irq(&ide_lock); ide_proc_unregister_port(hwif); @@ -351,16 +352,15 @@ void ide_unregister(ide_hwif_t *hwif) blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS); kfree(hwif->sg_table); unregister_blkdev(hwif->major, hwif->name); - spin_lock_irq(&ide_lock); if (hwif->dma_base) ide_release_dma_engine(hwif); + spin_lock_irq(&ide_lock); /* restore hwif data to pristine status */ ide_init_port_data(hwif, hwif->index); - -abort: spin_unlock_irq(&ide_lock); + mutex_unlock(&ide_cfg_mtx); } @@ -1094,13 +1094,6 @@ struct bus_type ide_bus_type = { EXPORT_SYMBOL_GPL(ide_bus_type); -static void ide_port_class_release(struct device *portdev) -{ - ide_hwif_t *hwif = dev_get_drvdata(portdev); - - put_device(&hwif->gendev); -} - int ide_vlb_clk; EXPORT_SYMBOL_GPL(ide_vlb_clk); @@ -1305,7 +1298,6 @@ static int __init ide_init(void) ret = PTR_ERR(ide_port_class); goto out_port_class; } - ide_port_class->dev_release = ide_port_class_release; init_ide_data(); diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c index eebc72465fc9..72c63e5dd630 100644 --- a/drivers/input/ff-core.c +++ b/drivers/input/ff-core.c @@ -28,6 +28,7 @@ #include <linux/input.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> /* * Check that the effect_id is a valid effect and whether the user @@ -166,8 +167,10 @@ int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, if (ret) goto out; + spin_lock_irq(&dev->event_lock); ff->effects[id] = *effect; ff->effect_owners[id] = file; + spin_unlock_irq(&dev->event_lock); out: mutex_unlock(&ff->mutex); @@ -189,16 +192,22 @@ static int erase_effect(struct input_dev *dev, int effect_id, if (error) return error; + spin_lock_irq(&dev->event_lock); ff->playback(dev, effect_id, 0); + ff->effect_owners[effect_id] = NULL; + spin_unlock_irq(&dev->event_lock); if (ff->erase) { error = ff->erase(dev, effect_id); - if (error) + if (error) { + spin_lock_irq(&dev->event_lock); + ff->effect_owners[effect_id] = file; + spin_unlock_irq(&dev->event_lock); + return error; + } } - ff->effect_owners[effect_id] = NULL; - return 0; } @@ -263,8 +272,6 @@ int input_ff_event(struct input_dev *dev, unsigned int type, if (type != EV_FF) return 0; - mutex_lock(&ff->mutex); - switch (code) { case FF_GAIN: if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffff) @@ -286,7 +293,6 @@ int input_ff_event(struct input_dev *dev, unsigned int type, break; } - mutex_unlock(&ff->mutex); return 0; } EXPORT_SYMBOL_GPL(input_ff_event); diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 0f47f4697cdf..9ce3b3baf3a2 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_MOTION: input_report_rel(dev, REL_X, event->motion.rel_x); input_report_rel(dev, REL_Y, event->motion.rel_y); + if (event->motion.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->motion.rel_z); break; case XENKBD_TYPE_KEY: dev = NULL; @@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_POS: input_report_abs(dev, ABS_X, event->pos.abs_x); input_report_abs(dev, ABS_Y, event->pos.abs_y); + if (event->pos.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->pos.rel_z); break; } if (dev) @@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev, ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); for (i = BTN_LEFT; i <= BTN_TASK; i++) set_bit(i, ptr->keybit); - ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); @@ -294,6 +300,16 @@ InitWait: */ if (dev->state != XenbusStateConnected) goto InitWait; /* no InitWait seen yet, fudge it */ + + /* Set input abs params to match backend screen res */ + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "width", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "height", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); + break; case XenbusStateClosing: @@ -337,4 +353,6 @@ static void __exit xenkbd_cleanup(void) module_init(xenkbd_init); module_exit(xenkbd_cleanup); +MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:vkbd"); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 005bd045d2eb..5faefeaf6790 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); * first step in the migration to the kernel types. pte_pfn is already defined * in the kernel. */ #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) -#define pte_flags(x) (pte_val(x) & ~PAGE_MASK) #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) /* interrupts_and_traps.c: */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 835def11419d..ab6a61db63ce 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -432,6 +432,7 @@ static int crypt_convert(struct crypt_config *cc, case 0: atomic_dec(&ctx->pending); ctx->sector++; + cond_resched(); continue; /* error */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 7cf512a34ccf..2580ac1b9b0f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3897,8 +3897,10 @@ static void autorun_devices(int part) md_probe(dev, NULL, NULL); mddev = mddev_find(dev); - if (!mddev) { - printk(KERN_ERR + if (!mddev || !mddev->gendisk) { + if (mddev) + mddev_put(mddev); + printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); break; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1de17da34a95..a71277b640ab 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2137,6 +2137,8 @@ static int run(mddev_t *mddev) !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; mddev->degraded++; + if (disk->rdev) + conf->fullsync = 1; } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c37e256b1176..54c8ee28fcc4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2898,6 +2898,8 @@ static void handle_stripe5(struct stripe_head *sh) for (i = conf->raid_disks; i--; ) { set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &dev->flags); + s.locked++; if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; } @@ -2911,6 +2913,7 @@ static void handle_stripe5(struct stripe_head *sh) conf->raid_disks); s.locked += handle_write_operations5(sh, 1, 1); } else if (s.expanded && + s.locked == 0 && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); @@ -4305,7 +4308,9 @@ static int run(mddev_t *mddev) " disk %d\n", bdevname(rdev->bdev,b), raid_disk); working_disks++; - } + } else + /* Cannot rely on bitmap to complete recovery */ + conf->fullsync = 1; } /* diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c index a3485817e46c..8fa91f846d59 100644 --- a/drivers/media/common/ir-keymaps.c +++ b/drivers/media/common/ir-keymaps.c @@ -2201,3 +2201,41 @@ IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE] = { [0x25] = KEY_POWER, /* power */ }; EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel); + +IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE] = { + [0x20] = KEY_LIST, + [0x00] = KEY_POWER, + [0x28] = KEY_1, + [0x18] = KEY_2, + [0x38] = KEY_3, + [0x24] = KEY_4, + [0x14] = KEY_5, + [0x34] = KEY_6, + [0x2c] = KEY_7, + [0x1c] = KEY_8, + [0x3c] = KEY_9, + [0x12] = KEY_SUBTITLE, + [0x22] = KEY_0, + [0x32] = KEY_REWIND, + [0x3a] = KEY_SHUFFLE, + [0x02] = KEY_PRINT, + [0x11] = KEY_CHANNELDOWN, + [0x31] = KEY_CHANNELUP, + [0x0c] = KEY_ZOOM, + [0x1e] = KEY_VOLUMEDOWN, + [0x3e] = KEY_VOLUMEUP, + [0x0a] = KEY_MUTE, + [0x04] = KEY_AUDIO, + [0x26] = KEY_RECORD, + [0x06] = KEY_PLAY, + [0x36] = KEY_STOP, + [0x16] = KEY_PAUSE, + [0x2e] = KEY_REWIND, + [0x0e] = KEY_FASTFORWARD, + [0x30] = KEY_TEXT, + [0x21] = KEY_GREEN, + [0x01] = KEY_BLUE, + [0x08] = KEY_EPG, + [0x2a] = KEY_MENU, +}; +EXPORT_SYMBOL_GPL(ir_codes_avermedia_a16d); diff --git a/drivers/media/common/tuners/tda18271-common.c b/drivers/media/common/tuners/tda18271-common.c index f1894fec32b9..6fb5b4586569 100644 --- a/drivers/media/common/tuners/tda18271-common.c +++ b/drivers/media/common/tuners/tda18271-common.c @@ -649,9 +649,17 @@ int tda18271_calc_rf_cal(struct dvb_frontend *fe, u32 *freq) u8 val; int ret = tda18271_lookup_map(fe, RF_CAL, freq, &val); + /* The TDA18271HD/C1 rf_cal map lookup is expected to go out of range + * for frequencies above 61.1 MHz. In these cases, the internal RF + * tracking filters calibration mechanism is used. + * + * There is no need to warn the user about this. + */ + if (ret < 0) + goto fail; regs[R_EB14] = val; - +fail: return ret; } diff --git a/drivers/media/common/tuners/tda18271-fe.c b/drivers/media/common/tuners/tda18271-fe.c index 89c01fb1f859..93063c6fbbf6 100644 --- a/drivers/media/common/tuners/tda18271-fe.c +++ b/drivers/media/common/tuners/tda18271-fe.c @@ -45,6 +45,21 @@ static inline int charge_pump_source(struct dvb_frontend *fe, int force) TDA18271_MAIN_PLL, force); } +static inline void tda18271_set_if_notch(struct dvb_frontend *fe) +{ + struct tda18271_priv *priv = fe->tuner_priv; + unsigned char *regs = priv->tda18271_regs; + + switch (priv->mode) { + case TDA18271_ANALOG: + regs[R_MPD] &= ~0x80; /* IF notch = 0 */ + break; + case TDA18271_DIGITAL: + regs[R_MPD] |= 0x80; /* IF notch = 1 */ + break; + } +} + static int tda18271_channel_configuration(struct dvb_frontend *fe, struct tda18271_std_map_item *map, u32 freq, u32 bw) @@ -60,25 +75,18 @@ static int tda18271_channel_configuration(struct dvb_frontend *fe, regs[R_EP3] &= ~0x1f; /* clear std bits */ regs[R_EP3] |= (map->agc_mode << 3) | map->std; - /* set rfagc to high speed mode */ - regs[R_EP3] &= ~0x04; + if (priv->id == TDA18271HDC2) { + /* set rfagc to high speed mode */ + regs[R_EP3] &= ~0x04; + } /* set cal mode to normal */ regs[R_EP4] &= ~0x03; - /* update IF output level & IF notch frequency */ + /* update IF output level */ regs[R_EP4] &= ~0x1c; /* clear if level bits */ regs[R_EP4] |= (map->if_lvl << 2); - switch (priv->mode) { - case TDA18271_ANALOG: - regs[R_MPD] &= ~0x80; /* IF notch = 0 */ - break; - case TDA18271_DIGITAL: - regs[R_MPD] |= 0x80; /* IF notch = 1 */ - break; - } - /* update FM_RFn */ regs[R_EP4] &= ~0x80; regs[R_EP4] |= map->fm_rfn << 7; @@ -95,6 +103,9 @@ static int tda18271_channel_configuration(struct dvb_frontend *fe, /* disable Power Level Indicator */ regs[R_EP1] |= 0x40; + /* make sure thermometer is off */ + regs[R_TM] &= ~0x10; + /* frequency dependent parameters */ tda18271_calc_ir_measure(fe, &freq); @@ -135,6 +146,7 @@ static int tda18271_channel_configuration(struct dvb_frontend *fe, switch (priv->role) { case TDA18271_MASTER: tda18271_calc_main_pll(fe, N); + tda18271_set_if_notch(fe); tda18271_write_regs(fe, R_MPD, 4); break; case TDA18271_SLAVE: @@ -142,6 +154,7 @@ static int tda18271_channel_configuration(struct dvb_frontend *fe, tda18271_write_regs(fe, R_CPD, 4); regs[R_MPD] = regs[R_CPD] & 0x7f; + tda18271_set_if_notch(fe); tda18271_write_regs(fe, R_MPD, 1); break; } @@ -160,12 +173,14 @@ static int tda18271_channel_configuration(struct dvb_frontend *fe, msleep(20); - /* set rfagc to normal speed mode */ - if (map->fm_rfn) - regs[R_EP3] &= ~0x04; - else - regs[R_EP3] |= 0x04; - ret = tda18271_write_regs(fe, R_EP3, 1); + if (priv->id == TDA18271HDC2) { + /* set rfagc to normal speed mode */ + if (map->fm_rfn) + regs[R_EP3] &= ~0x04; + else + regs[R_EP3] |= 0x04; + ret = tda18271_write_regs(fe, R_EP3, 1); + } fail: return ret; } @@ -507,7 +522,7 @@ static int tda18271_powerscan_init(struct dvb_frontend *fe) /* set cal mode to normal */ regs[R_EP4] &= ~0x03; - /* update IF output level & IF notch frequency */ + /* update IF output level */ regs[R_EP4] &= ~0x1c; /* clear if level bits */ ret = tda18271_write_regs(fe, R_EP3, 2); diff --git a/drivers/media/common/tuners/xc5000.c b/drivers/media/common/tuners/xc5000.c index ceae6db901ec..7cf4f5bdb2ec 100644 --- a/drivers/media/common/tuners/xc5000.c +++ b/drivers/media/common/tuners/xc5000.c @@ -177,6 +177,7 @@ static XC_TV_STANDARD XC5000_Standard[MAX_TV_STANDARD] = { {"FM Radio-INPUT1", 0x0208, 0x9002} }; +static int xc5000_is_firmware_loaded(struct dvb_frontend *fe); static int xc5000_writeregs(struct xc5000_priv *priv, u8 *buf, u8 len); static int xc5000_readregs(struct xc5000_priv *priv, u8 *buf, u8 len); static void xc5000_TunerReset(struct dvb_frontend *fe); @@ -352,7 +353,7 @@ static int xc_SetTVStandard(struct xc5000_priv *priv, static int xc_shutdown(struct xc5000_priv *priv) { - return 0; + return XC_RESULT_SUCCESS; /* Fixme: cannot bring tuner back alive once shutdown * without reloading the driver modules. * return xc_write_reg(priv, XREG_POWER_DOWN, 0); @@ -685,6 +686,25 @@ static int xc5000_set_params(struct dvb_frontend *fe, return 0; } +static int xc5000_is_firmware_loaded(struct dvb_frontend *fe) +{ + struct xc5000_priv *priv = fe->tuner_priv; + int ret; + u16 id; + + ret = xc5000_readreg(priv, XREG_PRODUCT_ID, &id); + if (ret == XC_RESULT_SUCCESS) { + if (id == XC_PRODUCT_ID_FW_NOT_LOADED) + ret = XC_RESULT_RESET_FAILURE; + else + ret = XC_RESULT_SUCCESS; + } + + dprintk(1, "%s() returns %s id = 0x%x\n", __func__, + ret == XC_RESULT_SUCCESS ? "True" : "False", id); + return ret; +} + static int xc_load_fw_and_init_tuner(struct dvb_frontend *fe); static int xc5000_set_analog_params(struct dvb_frontend *fe, @@ -693,7 +713,7 @@ static int xc5000_set_analog_params(struct dvb_frontend *fe, struct xc5000_priv *priv = fe->tuner_priv; int ret; - if(priv->fwloaded == 0) + if (xc5000_is_firmware_loaded(fe) != XC_RESULT_SUCCESS) xc_load_fw_and_init_tuner(fe); dprintk(1, "%s() frequency=%d (in units of 62.5khz)\n", @@ -808,11 +828,10 @@ static int xc_load_fw_and_init_tuner(struct dvb_frontend *fe) struct xc5000_priv *priv = fe->tuner_priv; int ret = 0; - if (priv->fwloaded == 0) { + if (xc5000_is_firmware_loaded(fe) != XC_RESULT_SUCCESS) { ret = xc5000_fwupload(fe); if (ret != XC_RESULT_SUCCESS) return ret; - priv->fwloaded = 1; } /* Start the tuner self-calibration process */ @@ -852,7 +871,6 @@ static int xc5000_sleep(struct dvb_frontend *fe) return -EREMOTEIO; } else { - /* priv->fwloaded = 0; */ return XC_RESULT_SUCCESS; } } @@ -933,7 +951,6 @@ struct dvb_frontend *xc5000_attach(struct dvb_frontend *fe, cfg->i2c_address); printk(KERN_INFO "xc5000: Firmware has been loaded previously\n"); - priv->fwloaded = 1; break; case XC_PRODUCT_ID_FW_NOT_LOADED: printk(KERN_INFO @@ -941,7 +958,6 @@ struct dvb_frontend *xc5000_attach(struct dvb_frontend *fe, cfg->i2c_address); printk(KERN_INFO "xc5000: Firmware has not been loaded previously\n"); - priv->fwloaded = 0; break; default: printk(KERN_ERR diff --git a/drivers/media/common/tuners/xc5000_priv.h b/drivers/media/common/tuners/xc5000_priv.h index ecebfe4745ad..a72a9887fe7f 100644 --- a/drivers/media/common/tuners/xc5000_priv.h +++ b/drivers/media/common/tuners/xc5000_priv.h @@ -30,7 +30,6 @@ struct xc5000_priv { u32 bandwidth; u8 video_standard; u8 rf_mode; - u8 fwloaded; void *devptr; }; diff --git a/drivers/media/dvb/dvb-usb/gl861.c b/drivers/media/dvb/dvb-usb/gl861.c index 0a8ac64a4e33..037f7ffb47b2 100644 --- a/drivers/media/dvb/dvb-usb/gl861.c +++ b/drivers/media/dvb/dvb-usb/gl861.c @@ -47,6 +47,8 @@ static int gl861_i2c_msg(struct dvb_usb_device *d, u8 addr, return -EINVAL; } + msleep(1); /* avoid I2C errors */ + return usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), req, type, value, index, rbuf, rlen, 2000); } @@ -92,16 +94,6 @@ static struct i2c_algorithm gl861_i2c_algo = { }; /* Callbacks for DVB USB */ -static int gl861_identify_state(struct usb_device *udev, - struct dvb_usb_device_properties *props, - struct dvb_usb_device_description **desc, - int *cold) -{ - *cold = 0; - - return 0; -} - static struct zl10353_config gl861_zl10353_config = { .demod_address = 0x0f, .no_tuner = 1, @@ -172,7 +164,6 @@ static struct dvb_usb_device_properties gl861_properties = { .size_of_priv = 0, - .identify_state = gl861_identify_state, .num_adapters = 1, .adapter = {{ @@ -194,13 +185,15 @@ static struct dvb_usb_device_properties gl861_properties = { .num_device_descs = 2, .devices = { - { "MSI Mega Sky 55801 DVB-T USB2.0", - { &gl861_table[0], NULL }, - { NULL }, + { + .name = "MSI Mega Sky 55801 DVB-T USB2.0", + .cold_ids = { NULL }, + .warm_ids = { &gl861_table[0], NULL }, }, - { "A-LINK DTU DVB-T USB2.0", - { &gl861_table[1], NULL }, - { NULL }, + { + .name = "A-LINK DTU DVB-T USB2.0", + .cold_ids = { NULL }, + .warm_ids = { &gl861_table[1], NULL }, }, } }; diff --git a/drivers/media/dvb/dvb-usb/umt-010.c b/drivers/media/dvb/dvb-usb/umt-010.c index 9e7653bb3b66..118aab1a3e54 100644 --- a/drivers/media/dvb/dvb-usb/umt-010.c +++ b/drivers/media/dvb/dvb-usb/umt-010.c @@ -107,7 +107,7 @@ static struct dvb_usb_device_properties umt_properties = { /* parameter for the MPEG2-data transfer */ .stream = { .type = USB_BULK, - .count = 20, + .count = MAX_NO_URBS_FOR_DATA_STREAM, .endpoint = 0x06, .u = { .bulk = { diff --git a/drivers/media/dvb/frontends/au8522.c b/drivers/media/dvb/frontends/au8522.c index 084a280c2d7f..03900d241a76 100644 --- a/drivers/media/dvb/frontends/au8522.c +++ b/drivers/media/dvb/frontends/au8522.c @@ -463,10 +463,13 @@ static int au8522_set_frontend(struct dvb_frontend *fe, struct dvb_frontend_parameters *p) { struct au8522_state *state = fe->demodulator_priv; + int ret = -EINVAL; dprintk("%s(frequency=%d)\n", __func__, p->frequency); - state->current_frequency = p->frequency; + if ((state->current_frequency == p->frequency) && + (state->current_modulation == p->u.vsb.modulation)) + return 0; au8522_enable_modulation(fe, p->u.vsb.modulation); @@ -476,11 +479,16 @@ static int au8522_set_frontend(struct dvb_frontend *fe, if (fe->ops.tuner_ops.set_params) { if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 1); - fe->ops.tuner_ops.set_params(fe, p); + ret = fe->ops.tuner_ops.set_params(fe, p); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); } + if (ret < 0) + return ret; + + state->current_frequency = p->frequency; + return 0; } @@ -498,6 +506,16 @@ static int au8522_init(struct dvb_frontend *fe) return 0; } +static int au8522_sleep(struct dvb_frontend *fe) +{ + struct au8522_state *state = fe->demodulator_priv; + dprintk("%s()\n", __func__); + + state->current_frequency = 0; + + return 0; +} + static int au8522_read_status(struct dvb_frontend *fe, fe_status_t *status) { struct au8522_state *state = fe->demodulator_priv; @@ -509,10 +527,8 @@ static int au8522_read_status(struct dvb_frontend *fe, fe_status_t *status) if (state->current_modulation == VSB_8) { dprintk("%s() Checking VSB_8\n", __func__); reg = au8522_readreg(state, 0x4088); - if (reg & 0x01) - *status |= FE_HAS_VITERBI; - if (reg & 0x02) - *status |= FE_HAS_LOCK | FE_HAS_SYNC; + if ((reg & 0x03) == 0x03) + *status |= FE_HAS_LOCK | FE_HAS_SYNC | FE_HAS_VITERBI; } else { dprintk("%s() Checking QAM\n", __func__); reg = au8522_readreg(state, 0x4541); @@ -672,6 +688,7 @@ static struct dvb_frontend_ops au8522_ops = { }, .init = au8522_init, + .sleep = au8522_sleep, .i2c_gate_ctrl = au8522_i2c_gate_ctrl, .set_frontend = au8522_set_frontend, .get_frontend = au8522_get_frontend, diff --git a/drivers/media/dvb/frontends/stv0299.c b/drivers/media/dvb/frontends/stv0299.c index 17556183e871..35435bef8e79 100644 --- a/drivers/media/dvb/frontends/stv0299.c +++ b/drivers/media/dvb/frontends/stv0299.c @@ -63,6 +63,7 @@ struct stv0299_state { u32 symbol_rate; fe_code_rate_t fec_inner; int errmode; + u32 ucblocks; }; #define STATUS_BER 0 @@ -501,8 +502,10 @@ static int stv0299_read_ber(struct dvb_frontend* fe, u32* ber) { struct stv0299_state* state = fe->demodulator_priv; - if (state->errmode != STATUS_BER) return 0; - *ber = (stv0299_readreg (state, 0x1d) << 8) | stv0299_readreg (state, 0x1e); + if (state->errmode != STATUS_BER) + return -ENOSYS; + + *ber = stv0299_readreg(state, 0x1e) | (stv0299_readreg(state, 0x1d) << 8); return 0; } @@ -540,8 +543,12 @@ static int stv0299_read_ucblocks(struct dvb_frontend* fe, u32* ucblocks) { struct stv0299_state* state = fe->demodulator_priv; - if (state->errmode != STATUS_UCBLOCKS) *ucblocks = 0; - else *ucblocks = (stv0299_readreg (state, 0x1d) << 8) | stv0299_readreg (state, 0x1e); + if (state->errmode != STATUS_UCBLOCKS) + return -ENOSYS; + + state->ucblocks += stv0299_readreg(state, 0x1e); + state->ucblocks += (stv0299_readreg(state, 0x1d) << 8); + *ucblocks = state->ucblocks; return 0; } diff --git a/drivers/media/dvb/frontends/tda10023.c b/drivers/media/dvb/frontends/tda10023.c index 0727b80bc4d2..c6ff5b82ff80 100644 --- a/drivers/media/dvb/frontends/tda10023.c +++ b/drivers/media/dvb/frontends/tda10023.c @@ -116,9 +116,12 @@ static u8 tda10023_readreg (struct tda10023_state* state, u8 reg) int ret; ret = i2c_transfer (state->i2c, msg, 2); - if (ret != 2) - printk("DVB: TDA10023: %s: readreg error (ret == %i)\n", - __func__, ret); + if (ret != 2) { + int num = state->frontend.dvb ? state->frontend.dvb->num : -1; + printk(KERN_ERR "DVB: TDA10023(%d): %s: readreg error " + "(reg == 0x%02x, ret == %i)\n", + num, __func__, reg, ret); + } return b1[0]; } @@ -129,11 +132,12 @@ static int tda10023_writereg (struct tda10023_state* state, u8 reg, u8 data) int ret; ret = i2c_transfer (state->i2c, &msg, 1); - if (ret != 1) - printk("DVB: TDA10023(%d): %s, writereg error " + if (ret != 1) { + int num = state->frontend.dvb ? state->frontend.dvb->num : -1; + printk(KERN_ERR "DVB: TDA10023(%d): %s, writereg error " "(reg == 0x%02x, val == 0x%02x, ret == %i)\n", - state->frontend.dvb->num, __func__, reg, data, ret); - + num, __func__, reg, data, ret); + } return (ret != 1) ? -EREMOTEIO : 0; } @@ -464,7 +468,7 @@ struct dvb_frontend* tda10023_attach(const struct tda1002x_config* config, int i; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda10023_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda10023_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/tda1004x.c b/drivers/media/dvb/frontends/tda1004x.c index 49973846373e..a0d638653567 100644 --- a/drivers/media/dvb/frontends/tda1004x.c +++ b/drivers/media/dvb/frontends/tda1004x.c @@ -1248,11 +1248,14 @@ struct dvb_frontend* tda10045_attach(const struct tda1004x_config* config, struct i2c_adapter* i2c) { struct tda1004x_state *state; + int id; /* allocate memory for the internal state */ state = kmalloc(sizeof(struct tda1004x_state), GFP_KERNEL); - if (!state) + if (!state) { + printk(KERN_ERR "Can't alocate memory for tda10045 state\n"); return NULL; + } /* setup the state */ state->config = config; @@ -1260,7 +1263,15 @@ struct dvb_frontend* tda10045_attach(const struct tda1004x_config* config, state->demod_type = TDA1004X_DEMOD_TDA10045; /* check if the demod is there */ - if (tda1004x_read_byte(state, TDA1004X_CHIPID) != 0x25) { + id = tda1004x_read_byte(state, TDA1004X_CHIPID); + if (id < 0) { + printk(KERN_ERR "tda10045: chip is not answering. Giving up.\n"); + kfree(state); + return NULL; + } + + if (id != 0x25) { + printk(KERN_ERR "Invalid tda1004x ID = 0x%02x. Can't proceed\n", id); kfree(state); return NULL; } @@ -1307,11 +1318,14 @@ struct dvb_frontend* tda10046_attach(const struct tda1004x_config* config, struct i2c_adapter* i2c) { struct tda1004x_state *state; + int id; /* allocate memory for the internal state */ state = kmalloc(sizeof(struct tda1004x_state), GFP_KERNEL); - if (!state) + if (!state) { + printk(KERN_ERR "Can't alocate memory for tda10046 state\n"); return NULL; + } /* setup the state */ state->config = config; @@ -1319,7 +1333,14 @@ struct dvb_frontend* tda10046_attach(const struct tda1004x_config* config, state->demod_type = TDA1004X_DEMOD_TDA10046; /* check if the demod is there */ - if (tda1004x_read_byte(state, TDA1004X_CHIPID) != 0x46) { + id = tda1004x_read_byte(state, TDA1004X_CHIPID); + if (id < 0) { + printk(KERN_ERR "tda10046: chip is not answering. Giving up.\n"); + kfree(state); + return NULL; + } + if (id != 0x46) { + printk(KERN_ERR "Invalid tda1004x ID = 0x%02x. Can't proceed\n", id); kfree(state); return NULL; } diff --git a/drivers/media/dvb/ttpci/Kconfig b/drivers/media/dvb/ttpci/Kconfig index d4339b1b3b68..07643e010093 100644 --- a/drivers/media/dvb/ttpci/Kconfig +++ b/drivers/media/dvb/ttpci/Kconfig @@ -101,6 +101,7 @@ config DVB_BUDGET config DVB_BUDGET_CI tristate "Budget cards with onboard CI connector" depends on DVB_BUDGET_CORE && I2C + depends on INPUT # due to IR select DVB_STV0297 if !DVB_FE_CUSTOMISE select DVB_STV0299 if !DVB_FE_CUSTOMISE select DVB_TDA1004X if !DVB_FE_CUSTOMISE diff --git a/drivers/media/dvb/ttpci/av7110_hw.c b/drivers/media/dvb/ttpci/av7110_hw.c index 9d81074b31df..3a3f5279e927 100644 --- a/drivers/media/dvb/ttpci/av7110_hw.c +++ b/drivers/media/dvb/ttpci/av7110_hw.c @@ -427,6 +427,7 @@ static int __av7110_send_fw_cmd(struct av7110 *av7110, u16* buf, int length) if (err) { printk(KERN_ERR "%s: timeout waiting on busy %s QUEUE\n", __func__, type); + av7110->arm_errors++; return -ETIMEDOUT; } msleep(1); @@ -853,10 +854,8 @@ static osd_raw_window_t bpp2bit[8] = { static inline int WaitUntilBmpLoaded(struct av7110 *av7110) { - int ret = wait_event_interruptible_timeout(av7110->bmpq, + int ret = wait_event_timeout(av7110->bmpq, av7110->bmp_state != BMP_LOADING, 10*HZ); - if (ret == -ERESTARTSYS) - return ret; if (ret == 0) { printk("dvb-ttpci: warning: timeout waiting in LoadBitmap: %d, %d\n", ret, av7110->bmp_state); diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 3b26fbd3e558..5ccb0aeca8cc 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -793,6 +793,14 @@ menuconfig V4L_USB_DRIVERS if V4L_USB_DRIVERS && USB +config USB_VIDEO_CLASS + tristate "USB Video Class (UVC)" + ---help--- + Support for the USB Video Class (UVC). Currently only video + input devices, such as webcams, are supported. + + For more information see: <http://linux-uvc.berlios.de/> + source "drivers/media/video/pvrusb2/Kconfig" source "drivers/media/video/em28xx/Kconfig" diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index dff0d6abe917..ecbbfaab24d5 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -136,6 +136,8 @@ obj-$(CONFIG_SOC_CAMERA_MT9V022) += mt9v022.o obj-$(CONFIG_VIDEO_AU0828) += au0828/ +obj-$(CONFIG_USB_VIDEO_CLASS) += uvc/ + EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core EXTRA_CFLAGS += -Idrivers/media/dvb/frontends EXTRA_CFLAGS += -Idrivers/media/common/tuners diff --git a/drivers/media/video/au0828/au0828-cards.c b/drivers/media/video/au0828/au0828-cards.c index a2a6983444fa..898e12395e7c 100644 --- a/drivers/media/video/au0828/au0828-cards.c +++ b/drivers/media/video/au0828/au0828-cards.c @@ -77,8 +77,14 @@ static void hauppauge_eeprom(struct au0828_dev *dev, u8 *eeprom_data) /* Make sure we support the board model */ switch (tv.model) { + case 72000: /* WinTV-HVR950q (Retail, IR, ATSC/QAM */ case 72001: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and basic analog video */ + case 72211: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and basic analog video */ + case 72221: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and basic analog video */ + case 72231: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and basic analog video */ + case 72241: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM and basic analog video */ case 72301: /* WinTV-HVR850 (Retail, IR, ATSC and basic analog video */ + case 72500: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM */ break; default: printk(KERN_WARNING "%s: warning: " @@ -175,6 +181,18 @@ struct usb_device_id au0828_usb_id_table [] = { .driver_info = AU0828_BOARD_HAUPPAUGE_HVR850 }, { USB_DEVICE(0x0fe9, 0xd620), .driver_info = AU0828_BOARD_DVICO_FUSIONHDTV7 }, + { USB_DEVICE(0x2040, 0x7210), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, + { USB_DEVICE(0x2040, 0x7217), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, + { USB_DEVICE(0x2040, 0x721b), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, + { USB_DEVICE(0x2040, 0x721f), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, + { USB_DEVICE(0x2040, 0x7280), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, + { USB_DEVICE(0x0fd9, 0x0008), + .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q }, { }, }; diff --git a/drivers/media/video/cx18/Kconfig b/drivers/media/video/cx18/Kconfig index 5f942690570c..9aefdc5ea79a 100644 --- a/drivers/media/video/cx18/Kconfig +++ b/drivers/media/video/cx18/Kconfig @@ -10,8 +10,8 @@ config VIDEO_CX18 select VIDEO_TVEEPROM select VIDEO_CX2341X select VIDEO_CS5345 - select DVB_S5H1409 - select MEDIA_TUNER_MXL5005S + select DVB_S5H1409 if !DVB_FE_CUSTOMISE + select MEDIA_TUNER_MXL5005S if !DVB_FE_CUSTOMISE ---help--- This is a video4linux driver for Conexant cx23418 based PCI combo video recorder devices. diff --git a/drivers/media/video/cx18/cx18-av-core.c b/drivers/media/video/cx18/cx18-av-core.c index 9a26751615c6..faca43eb940f 100644 --- a/drivers/media/video/cx18/cx18-av-core.c +++ b/drivers/media/video/cx18/cx18-av-core.c @@ -69,6 +69,58 @@ int cx18_av_and_or4(struct cx18 *cx, u16 addr, u32 and_mask, or_value); } +int cx18_av_write_no_acfg(struct cx18 *cx, u16 addr, u8 value, int no_acfg_mask) +{ + int retval; + u32 saved_reg[8] = {0}; + + if (no_acfg_mask & CXADEC_NO_ACFG_AFE) { + saved_reg[0] = cx18_av_read4(cx, CXADEC_CHIP_CTRL); + saved_reg[1] = cx18_av_read4(cx, CXADEC_AFE_CTRL); + } + + if (no_acfg_mask & CXADEC_NO_ACFG_PLL) { + saved_reg[2] = cx18_av_read4(cx, CXADEC_PLL_CTRL1); + saved_reg[3] = cx18_av_read4(cx, CXADEC_VID_PLL_FRAC); + } + + if (no_acfg_mask & CXADEC_NO_ACFG_VID) { + saved_reg[4] = cx18_av_read4(cx, CXADEC_HORIZ_TIM_CTRL); + saved_reg[5] = cx18_av_read4(cx, CXADEC_VERT_TIM_CTRL); + saved_reg[6] = cx18_av_read4(cx, CXADEC_SRC_COMB_CFG); + saved_reg[7] = cx18_av_read4(cx, CXADEC_CHROMA_VBIOFF_CFG); + } + + retval = cx18_av_write(cx, addr, value); + + if (no_acfg_mask & CXADEC_NO_ACFG_AFE) { + cx18_av_write4(cx, CXADEC_CHIP_CTRL, saved_reg[0]); + cx18_av_write4(cx, CXADEC_AFE_CTRL, saved_reg[1]); + } + + if (no_acfg_mask & CXADEC_NO_ACFG_PLL) { + cx18_av_write4(cx, CXADEC_PLL_CTRL1, saved_reg[2]); + cx18_av_write4(cx, CXADEC_VID_PLL_FRAC, saved_reg[3]); + } + + if (no_acfg_mask & CXADEC_NO_ACFG_VID) { + cx18_av_write4(cx, CXADEC_HORIZ_TIM_CTRL, saved_reg[4]); + cx18_av_write4(cx, CXADEC_VERT_TIM_CTRL, saved_reg[5]); + cx18_av_write4(cx, CXADEC_SRC_COMB_CFG, saved_reg[6]); + cx18_av_write4(cx, CXADEC_CHROMA_VBIOFF_CFG, saved_reg[7]); + } + + return retval; +} + +int cx18_av_and_or_no_acfg(struct cx18 *cx, u16 addr, unsigned and_mask, + u8 or_value, int no_acfg_mask) +{ + return cx18_av_write_no_acfg(cx, addr, + (cx18_av_read(cx, addr) & and_mask) | + or_value, no_acfg_mask); +} + /* ----------------------------------------------------------------------- */ static int set_input(struct cx18 *cx, enum cx18_av_video_input vid_input, @@ -170,13 +222,15 @@ static void input_change(struct cx18 *cx) /* Follow step 8c and 8d of section 3.16 in the cx18_av datasheet */ if (std & V4L2_STD_SECAM) - cx18_av_write(cx, 0x402, 0); + cx18_av_write_no_acfg(cx, 0x402, 0, CXADEC_NO_ACFG_ALL); else { - cx18_av_write(cx, 0x402, 0x04); + cx18_av_write_no_acfg(cx, 0x402, 0x04, CXADEC_NO_ACFG_ALL); cx18_av_write(cx, 0x49f, (std & V4L2_STD_NTSC) ? 0x14 : 0x11); } - cx18_av_and_or(cx, 0x401, ~0x60, 0); - cx18_av_and_or(cx, 0x401, ~0x60, 0x60); + cx18_av_and_or_no_acfg(cx, 0x401, ~0x60, 0, + CXADEC_NO_ACFG_PLL | CXADEC_NO_ACFG_VID); + cx18_av_and_or_no_acfg(cx, 0x401, ~0x60, 0x60, + CXADEC_NO_ACFG_PLL | CXADEC_NO_ACFG_VID); if (std & V4L2_STD_525_60) { if (std == V4L2_STD_NTSC_M_JP) { @@ -228,7 +282,7 @@ static int set_input(struct cx18 *cx, enum cx18_av_video_input vid_input, if ((vid_input & ~0xff0) || luma < CX18_AV_SVIDEO_LUMA1 || - luma > CX18_AV_SVIDEO_LUMA4 || + luma > CX18_AV_SVIDEO_LUMA8 || chroma < CX18_AV_SVIDEO_CHROMA4 || chroma > CX18_AV_SVIDEO_CHROMA8) { CX18_ERR("0x%04x is not a valid video input!\n", @@ -262,7 +316,8 @@ static int set_input(struct cx18 *cx, enum cx18_av_video_input vid_input, cx18_av_write(cx, 0x103, reg); /* Set INPUT_MODE to Composite (0) or S-Video (1) */ - cx18_av_and_or(cx, 0x401, ~0x6, is_composite ? 0 : 0x02); + cx18_av_and_or_no_acfg(cx, 0x401, ~0x6, is_composite ? 0 : 0x02, + CXADEC_NO_ACFG_PLL | CXADEC_NO_ACFG_VID); /* Set CH_SEL_ADC2 to 1 if input comes from CH3 */ cx18_av_and_or(cx, 0x102, ~0x2, (reg & 0x80) == 0 ? 2 : 0); /* Set DUAL_MODE_ADC2 to 1 if input comes from both CH2 and CH3 */ @@ -318,12 +373,12 @@ static int set_v4lstd(struct cx18 *cx) This happens for example with the Yuan MPC622. */ if (fmt >= 4 && fmt < 8) { /* Set format to NTSC-M */ - cx18_av_and_or(cx, 0x400, ~0xf, 1); + cx18_av_and_or_no_acfg(cx, 0x400, ~0xf, 1, CXADEC_NO_ACFG_AFE); /* Turn off LCOMB */ cx18_av_and_or(cx, 0x47b, ~6, 0); } - cx18_av_and_or(cx, 0x400, ~0xf, fmt); - cx18_av_and_or(cx, 0x403, ~0x3, pal_m); + cx18_av_and_or_no_acfg(cx, 0x400, ~0xf, fmt, CXADEC_NO_ACFG_AFE); + cx18_av_and_or_no_acfg(cx, 0x403, ~0x3, pal_m, CXADEC_NO_ACFG_ALL); cx18_av_vbi_setup(cx); input_change(cx); return 0; diff --git a/drivers/media/video/cx18/cx18-av-core.h b/drivers/media/video/cx18/cx18-av-core.h index 786901d72e9a..c172823ce1d8 100644 --- a/drivers/media/video/cx18/cx18-av-core.h +++ b/drivers/media/video/cx18/cx18-av-core.h @@ -37,12 +37,16 @@ enum cx18_av_video_input { CX18_AV_COMPOSITE7, CX18_AV_COMPOSITE8, - /* S-Video inputs consist of one luma input (In1-In4) ORed with one + /* S-Video inputs consist of one luma input (In1-In8) ORed with one chroma input (In5-In8) */ CX18_AV_SVIDEO_LUMA1 = 0x10, CX18_AV_SVIDEO_LUMA2 = 0x20, CX18_AV_SVIDEO_LUMA3 = 0x30, CX18_AV_SVIDEO_LUMA4 = 0x40, + CX18_AV_SVIDEO_LUMA5 = 0x50, + CX18_AV_SVIDEO_LUMA6 = 0x60, + CX18_AV_SVIDEO_LUMA7 = 0x70, + CX18_AV_SVIDEO_LUMA8 = 0x80, CX18_AV_SVIDEO_CHROMA4 = 0x400, CX18_AV_SVIDEO_CHROMA5 = 0x500, CX18_AV_SVIDEO_CHROMA6 = 0x600, @@ -291,14 +295,24 @@ struct cx18_av_state { #define CXADEC_SELECT_AUDIO_STANDARD_FM 0xF9 /* FM radio */ #define CXADEC_SELECT_AUDIO_STANDARD_AUTO 0xFF /* Auto detect */ +/* Flags on what to preserve on write to 0x400-0x403 with cx18_av_.*_no_acfg()*/ +#define CXADEC_NO_ACFG_AFE 0x01 /* Preserve 0x100-0x107 */ +#define CXADEC_NO_ACFG_PLL 0x02 /* Preserve 0x108-0x10f */ +#define CXADEC_NO_ACFG_VID 0x04 /* Preserve 0x470-0x47f */ +#define CXADEC_NO_ACFG_ALL 0x07 + /* ----------------------------------------------------------------------- */ /* cx18_av-core.c */ int cx18_av_write(struct cx18 *cx, u16 addr, u8 value); int cx18_av_write4(struct cx18 *cx, u16 addr, u32 value); +int cx18_av_write_no_acfg(struct cx18 *cx, u16 addr, u8 value, + int no_acfg_mask); u8 cx18_av_read(struct cx18 *cx, u16 addr); u32 cx18_av_read4(struct cx18 *cx, u16 addr); int cx18_av_and_or(struct cx18 *cx, u16 addr, unsigned mask, u8 value); int cx18_av_and_or4(struct cx18 *cx, u16 addr, u32 mask, u32 value); +int cx18_av_and_or_no_acfg(struct cx18 *cx, u16 addr, unsigned mask, u8 value, + int no_acfg_mask); int cx18_av_cmd(struct cx18 *cx, unsigned int cmd, void *arg); /* ----------------------------------------------------------------------- */ diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c index baccd079243d..c26e0ef5b075 100644 --- a/drivers/media/video/cx18/cx18-cards.c +++ b/drivers/media/video/cx18/cx18-cards.c @@ -23,6 +23,7 @@ #include "cx18-driver.h" #include "cx18-cards.h" +#include "cx18-av-core.h" #include "cx18-i2c.h" #include <media/cs5345.h> @@ -54,22 +55,22 @@ static const struct cx18_card cx18_card_hvr1600_esmt = { .hw_all = CX18_HW_TVEEPROM | CX18_HW_TUNER | CX18_HW_CS5345 | CX18_HW_DVB, .video_inputs = { - { CX18_CARD_INPUT_VID_TUNER, 0, CX23418_COMPOSITE7 }, - { CX18_CARD_INPUT_SVIDEO1, 1, CX23418_SVIDEO1 }, - { CX18_CARD_INPUT_COMPOSITE1, 1, CX23418_COMPOSITE3 }, - { CX18_CARD_INPUT_SVIDEO2, 2, CX23418_SVIDEO2 }, - { CX18_CARD_INPUT_COMPOSITE2, 2, CX23418_COMPOSITE4 }, + { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 }, + { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 }, + { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 }, + { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 }, + { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 }, }, .audio_inputs = { { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 }, + CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 }, { CX18_CARD_INPUT_LINE_IN1, - CX23418_AUDIO_SERIAL, CS5345_IN_2 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_2 }, { CX18_CARD_INPUT_LINE_IN2, - CX23418_AUDIO_SERIAL, CS5345_IN_2 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_3 }, }, .radio_input = { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO_SERIAL, 0 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_4 }, .ddr = { /* ESMT M13S128324A-5B memory */ .chip_config = 0x003, @@ -81,6 +82,11 @@ static const struct cx18_card cx18_card_hvr1600_esmt = { }, .gpio_init.initial_value = 0x3001, .gpio_init.direction = 0x3001, + .gpio_i2c_slave_reset = { + .active_lo_mask = 0x3001, + .msecs_asserted = 10, + .msecs_recovery = 40, + }, .i2c = &cx18_i2c_std, }; @@ -94,22 +100,22 @@ static const struct cx18_card cx18_card_hvr1600_samsung = { .hw_all = CX18_HW_TVEEPROM | CX18_HW_TUNER | CX18_HW_CS5345 | CX18_HW_DVB, .video_inputs = { - { CX18_CARD_INPUT_VID_TUNER, 0, CX23418_COMPOSITE7 }, - { CX18_CARD_INPUT_SVIDEO1, 1, CX23418_SVIDEO1 }, - { CX18_CARD_INPUT_COMPOSITE1, 1, CX23418_COMPOSITE3 }, - { CX18_CARD_INPUT_SVIDEO2, 2, CX23418_SVIDEO2 }, - { CX18_CARD_INPUT_COMPOSITE2, 2, CX23418_COMPOSITE4 }, + { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 }, + { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 }, + { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 }, + { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 }, + { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 }, }, .audio_inputs = { { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 }, + CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 }, { CX18_CARD_INPUT_LINE_IN1, - CX23418_AUDIO_SERIAL, CS5345_IN_2 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_2 }, { CX18_CARD_INPUT_LINE_IN2, - CX23418_AUDIO_SERIAL, CS5345_IN_2 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_3 }, }, .radio_input = { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO_SERIAL, 0 }, + CX18_AV_AUDIO_SERIAL, CS5345_IN_4 }, .ddr = { /* Samsung K4D263238G-VC33 memory */ .chip_config = 0x003, @@ -121,6 +127,11 @@ static const struct cx18_card cx18_card_hvr1600_samsung = { }, .gpio_init.initial_value = 0x3001, .gpio_init.direction = 0x3001, + .gpio_i2c_slave_reset = { + .active_lo_mask = 0x3001, + .msecs_asserted = 10, + .msecs_recovery = 40, + }, .i2c = &cx18_i2c_std, }; @@ -141,19 +152,19 @@ static const struct cx18_card cx18_card_h900 = { .hw_audio_ctrl = CX18_HW_CX23418, .hw_all = CX18_HW_TUNER, .video_inputs = { - { CX18_CARD_INPUT_VID_TUNER, 0, CX23418_COMPOSITE2 }, + { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE2 }, { CX18_CARD_INPUT_SVIDEO1, 1, - CX23418_SVIDEO_LUMA3 | CX23418_SVIDEO_CHROMA4 }, - { CX18_CARD_INPUT_COMPOSITE1, 1, CX23418_COMPOSITE1 }, + CX18_AV_SVIDEO_LUMA3 | CX18_AV_SVIDEO_CHROMA4 }, + { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE1 }, }, .audio_inputs = { { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO8, 0 }, + CX18_AV_AUDIO8, 0 }, { CX18_CARD_INPUT_LINE_IN1, - CX23418_AUDIO_SERIAL, 0 }, + CX18_AV_AUDIO_SERIAL, 0 }, }, .radio_input = { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO_SERIAL, 0 }, + CX18_AV_AUDIO_SERIAL, 0 }, .tuners = { { .std = V4L2_STD_ALL, .tuner = TUNER_XC2028 }, }, @@ -183,23 +194,26 @@ static const struct cx18_card_pci_info cx18_pci_mpc718[] = { static const struct cx18_card cx18_card_mpc718 = { .type = CX18_CARD_YUAN_MPC718, .name = "Yuan MPC718", - .comment = "Not yet supported!\n", - .v4l2_capabilities = 0, + .comment = "Some Composite and S-Video inputs are currently working.\n", + .v4l2_capabilities = CX18_CAP_ENCODER, .hw_audio_ctrl = CX18_HW_CX23418, .hw_all = CX18_HW_TUNER, .video_inputs = { - { CX18_CARD_INPUT_VID_TUNER, 0, CX23418_COMPOSITE7 }, - { CX18_CARD_INPUT_SVIDEO1, 1, CX23418_SVIDEO1 }, - { CX18_CARD_INPUT_COMPOSITE1, 1, CX23418_COMPOSITE3 }, + { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE2 }, + { CX18_CARD_INPUT_SVIDEO1, 1, + CX18_AV_SVIDEO_LUMA3 | CX18_AV_SVIDEO_CHROMA4 }, + { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE1 }, + { CX18_CARD_INPUT_SVIDEO2, 2, + CX18_AV_SVIDEO_LUMA7 | CX18_AV_SVIDEO_CHROMA8 }, + { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE6 }, + { CX18_CARD_INPUT_COMPOSITE3, 2, CX18_AV_COMPOSITE3 }, }, .audio_inputs = { - { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO8, 0 }, - { CX18_CARD_INPUT_LINE_IN1, - CX23418_AUDIO_SERIAL, 0 }, + { CX18_CARD_INPUT_AUD_TUNER, CX18_AV_AUDIO5, 0 }, + { CX18_CARD_INPUT_LINE_IN1, CX18_AV_AUDIO_SERIAL, 0 }, + { CX18_CARD_INPUT_LINE_IN2, CX18_AV_AUDIO_SERIAL, 0 }, }, - .radio_input = { CX18_CARD_INPUT_AUD_TUNER, - CX23418_AUDIO_SERIAL, 0 }, + .radio_input = { CX18_CARD_INPUT_AUD_TUNER, CX18_AV_AUDIO_SERIAL, 0 }, .tuners = { /* XC3028 tuner */ { .std = V4L2_STD_ALL, .tuner = TUNER_XC2028 }, diff --git a/drivers/media/video/cx18/cx18-cards.h b/drivers/media/video/cx18/cx18-cards.h index bccb67f0db16..dc2dd945d4c3 100644 --- a/drivers/media/video/cx18/cx18-cards.h +++ b/drivers/media/video/cx18/cx18-cards.h @@ -36,36 +36,6 @@ #define CX18_CARD_INPUT_COMPOSITE2 5 #define CX18_CARD_INPUT_COMPOSITE3 6 -enum cx34180_video_input { - /* Composite video inputs In1-In8 */ - CX23418_COMPOSITE1 = 1, - CX23418_COMPOSITE2, - CX23418_COMPOSITE3, - CX23418_COMPOSITE4, - CX23418_COMPOSITE5, - CX23418_COMPOSITE6, - CX23418_COMPOSITE7, - CX23418_COMPOSITE8, - - /* S-Video inputs consist of one luma input (In1-In4) ORed with one - chroma input (In5-In8) */ - CX23418_SVIDEO_LUMA1 = 0x10, - CX23418_SVIDEO_LUMA2 = 0x20, - CX23418_SVIDEO_LUMA3 = 0x30, - CX23418_SVIDEO_LUMA4 = 0x40, - CX23418_SVIDEO_CHROMA4 = 0x400, - CX23418_SVIDEO_CHROMA5 = 0x500, - CX23418_SVIDEO_CHROMA6 = 0x600, - CX23418_SVIDEO_CHROMA7 = 0x700, - CX23418_SVIDEO_CHROMA8 = 0x800, - - /* S-Video aliases for common luma/chroma combinations */ - CX23418_SVIDEO1 = 0x510, - CX23418_SVIDEO2 = 0x620, - CX23418_SVIDEO3 = 0x730, - CX23418_SVIDEO4 = 0x840, -}; - /* audio inputs */ #define CX18_CARD_INPUT_AUD_TUNER 1 #define CX18_CARD_INPUT_LINE_IN1 2 @@ -75,16 +45,6 @@ enum cx34180_video_input { #define CX18_CARD_MAX_AUDIO_INPUTS 3 #define CX18_CARD_MAX_TUNERS 2 -enum cx23418_audio_input { - /* Audio inputs: serial or In4-In8 */ - CX23418_AUDIO_SERIAL, - CX23418_AUDIO4 = 4, - CX23418_AUDIO5, - CX23418_AUDIO6, - CX23418_AUDIO7, - CX23418_AUDIO8, -}; - /* V4L2 capability aliases */ #define CX18_CAP_ENCODER (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER | \ V4L2_CAP_AUDIO | V4L2_CAP_READWRITE) @@ -118,6 +78,13 @@ struct cx18_gpio_init { /* set initial GPIO DIR and OUT values */ u32 initial_value; }; +struct cx18_gpio_i2c_slave_reset { + u32 active_lo_mask; /* GPIO outputs that reset i2c chips when low */ + u32 active_hi_mask; /* GPIO outputs that reset i2c chips when high */ + int msecs_asserted; /* time period reset must remain asserted */ + int msecs_recovery; /* time after deassert for chips to be ready */ +}; + struct cx18_card_tuner { v4l2_std_id std; /* standard for which the tuner is suitable */ int tuner; /* tuner ID (from tuner.h) */ @@ -154,7 +121,8 @@ struct cx18_card { /* GPIO card-specific settings */ u8 xceive_pin; /* XCeive tuner GPIO reset pin */ - struct cx18_gpio_init gpio_init; + struct cx18_gpio_init gpio_init; + struct cx18_gpio_i2c_slave_reset gpio_i2c_slave_reset; struct cx18_card_tuner tuners[CX18_CARD_MAX_TUNERS]; struct cx18_card_tuner_i2c *i2c; diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c index c9744173f969..cae38985b131 100644 --- a/drivers/media/video/cx18/cx18-dvb.c +++ b/drivers/media/video/cx18/cx18-dvb.c @@ -69,11 +69,21 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed) struct dvb_demux *demux = feed->demux; struct cx18_stream *stream = (struct cx18_stream *) demux->priv; struct cx18 *cx = stream->cx; - int ret = -EINVAL; + int ret; u32 v; CX18_DEBUG_INFO("Start feed: pid = 0x%x index = %d\n", feed->pid, feed->index); + + mutex_lock(&cx->serialize_lock); + ret = cx18_init_on_first_open(cx); + mutex_unlock(&cx->serialize_lock); + if (ret) { + CX18_ERR("Failed to initialize firmware starting DVB feed\n"); + return ret; + } + ret = -EINVAL; + switch (cx->card->type) { case CX18_CARD_HVR_1600_ESMT: case CX18_CARD_HVR_1600_SAMSUNG: @@ -101,6 +111,11 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed) if (stream->dvb.feeding++ == 0) { CX18_DEBUG_INFO("Starting Transport DMA\n"); ret = cx18_start_v4l2_encode_stream(stream); + if (ret < 0) { + CX18_DEBUG_INFO( + "Failed to start Transport DMA\n"); + stream->dvb.feeding--; + } } else ret = 0; mutex_unlock(&stream->dvb.feedlock); diff --git a/drivers/media/video/cx18/cx18-gpio.c b/drivers/media/video/cx18/cx18-gpio.c index ceb63653c926..b302833f6f9d 100644 --- a/drivers/media/video/cx18/cx18-gpio.c +++ b/drivers/media/video/cx18/cx18-gpio.c @@ -53,10 +53,34 @@ static void gpio_write(struct cx18 *cx) write_reg(((dir & 0xffff) << 16) | (val & 0xffff), CX18_REG_GPIO_OUT1); write_reg(dir & 0xffff0000, CX18_REG_GPIO_DIR2); - write_reg((dir & 0xffff0000) | ((val & 0xffff0000) >> 16), + write_reg_sync((dir & 0xffff0000) | ((val & 0xffff0000) >> 16), CX18_REG_GPIO_OUT2); } +void cx18_reset_i2c_slaves_gpio(struct cx18 *cx) +{ + const struct cx18_gpio_i2c_slave_reset *p; + + p = &cx->card->gpio_i2c_slave_reset; + + if ((p->active_lo_mask | p->active_hi_mask) == 0) + return; + + /* Assuming that the masks are a subset of the bits in gpio_dir */ + + /* Assert */ + cx->gpio_val = + (cx->gpio_val | p->active_hi_mask) & ~(p->active_lo_mask); + gpio_write(cx); + schedule_timeout_uninterruptible(msecs_to_jiffies(p->msecs_asserted)); + + /* Deassert */ + cx->gpio_val = + (cx->gpio_val | p->active_lo_mask) & ~(p->active_hi_mask); + gpio_write(cx); + schedule_timeout_uninterruptible(msecs_to_jiffies(p->msecs_recovery)); +} + void cx18_gpio_init(struct cx18 *cx) { cx->gpio_dir = cx->card->gpio_init.direction; diff --git a/drivers/media/video/cx18/cx18-gpio.h b/drivers/media/video/cx18/cx18-gpio.h index 41bac8856b50..525c328f748a 100644 --- a/drivers/media/video/cx18/cx18-gpio.h +++ b/drivers/media/video/cx18/cx18-gpio.h @@ -21,4 +21,5 @@ */ void cx18_gpio_init(struct cx18 *cx); +void cx18_reset_i2c_slaves_gpio(struct cx18 *cx); int cx18_reset_tuner_gpio(void *dev, int cmd, int value); diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c index 1d6c51a75313..680bc4e35b79 100644 --- a/drivers/media/video/cx18/cx18-i2c.c +++ b/drivers/media/video/cx18/cx18-i2c.c @@ -405,6 +405,8 @@ int init_cx18_i2c(struct cx18 *cx) cx18_setscl(&cx->i2c_algo_cb_data[1], 1); cx18_setsda(&cx->i2c_algo_cb_data[1], 1); + cx18_reset_i2c_slaves_gpio(cx); + return i2c_bit_add_bus(&cx->i2c_adap[0]) || i2c_bit_add_bus(&cx->i2c_adap[1]); } diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index 607efdcd22f8..1da6f134888d 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -433,7 +433,7 @@ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_inp int chroma = vid_input & 0xf00; if ((vid_input & ~0xff0) || - luma < CX25840_SVIDEO_LUMA1 || luma > CX25840_SVIDEO_LUMA4 || + luma < CX25840_SVIDEO_LUMA1 || luma > CX25840_SVIDEO_LUMA8 || chroma < CX25840_SVIDEO_CHROMA4 || chroma > CX25840_SVIDEO_CHROMA8) { v4l_err(client, "0x%04x is not a valid video input!\n", vid_input); diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c index e976fc6bef7c..80c8883e54b5 100644 --- a/drivers/media/video/cx88/cx88-alsa.c +++ b/drivers/media/video/cx88/cx88-alsa.c @@ -332,6 +332,12 @@ static int snd_cx88_pcm_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; int err; + if (!chip) { + printk(KERN_ERR "BUG: cx88 can't find device struct." + " Can't proceed with open\n"); + return -ENODEV; + } + err = snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS); if (err < 0) goto _error; diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c index 92b2a6db4fdc..3c006103c1eb 100644 --- a/drivers/media/video/em28xx/em28xx-audio.c +++ b/drivers/media/video/em28xx/em28xx-audio.c @@ -268,6 +268,12 @@ static int snd_em28xx_capture_open(struct snd_pcm_substream *substream) dprintk("opening device and trying to acquire exclusive lock\n"); + if (!dev) { + printk(KERN_ERR "BUG: em28xx can't find device struct." + " Can't proceed with open\n"); + return -ENODEV; + } + /* Sets volume, mute, etc */ dev->mute = 0; @@ -415,6 +421,12 @@ static int em28xx_audio_init(struct em28xx *dev) static int devnr; int ret, err; + if (dev->has_audio_class) { + /* This device does not support the extension (in this case + the device is expecting the snd-usb-audio module */ + return 0; + } + printk(KERN_INFO "em28xx-audio.c: probing for em28x1 " "non standard usbaudio\n"); printk(KERN_INFO "em28xx-audio.c: Copyright (C) 2006 Markus " @@ -458,6 +470,12 @@ static int em28xx_audio_fini(struct em28xx *dev) if (dev == NULL) return 0; + if (dev->has_audio_class) { + /* This device does not support the extension (in this case + the device is expecting the snd-usb-audio module */ + return 0; + } + if (dev->adev) { snd_card_free(dev->adev->sndcard); kfree(dev->adev); diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 3e4f3c7e92e7..8cbda43727c3 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -157,6 +157,7 @@ struct em28xx_board em28xx_boards[] = { .tda9887_conf = TDA9887_PRESENT, .tuner_type = TUNER_XC2028, .mts_firmware = 1, + .has_dvb = 1, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, @@ -524,6 +525,9 @@ void em28xx_pre_card_setup(struct em28xx *dev) rc = em28xx_read_reg(dev, EM28XX_R0A_CHIPID); if (rc > 0) { switch (rc) { + case CHIP_ID_EM2860: + em28xx_info("chip ID is em2860\n"); + break; case CHIP_ID_EM2883: em28xx_info("chip ID is em2882/em2883\n"); dev->wait_after_write = 0; diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c index 8cf4983f0039..0b2333ee07f8 100644 --- a/drivers/media/video/em28xx/em28xx-dvb.c +++ b/drivers/media/video/em28xx/em28xx-dvb.c @@ -382,6 +382,11 @@ static int dvb_init(struct em28xx *dev) int result = 0; struct em28xx_dvb *dvb; + if (!dev->has_dvb) { + /* This device does not support the extension */ + return 0; + } + dvb = kzalloc(sizeof(struct em28xx_dvb), GFP_KERNEL); if (dvb == NULL) { @@ -444,6 +449,11 @@ out_free: static int dvb_fini(struct em28xx *dev) { + if (!dev->has_dvb) { + /* This device does not support the extension */ + return 0; + } + if (dev->dvb) { unregister_dvb(dev->dvb); dev->dvb = NULL; diff --git a/drivers/media/video/em28xx/em28xx-reg.h b/drivers/media/video/em28xx/em28xx-reg.h index 9058bed07953..fac1ab23f621 100644 --- a/drivers/media/video/em28xx/em28xx-reg.h +++ b/drivers/media/video/em28xx/em28xx-reg.h @@ -84,5 +84,6 @@ /* FIXME: Need to be populated with the other chip ID's */ enum em28xx_chip_id { + CHIP_ID_EM2860 = 34, CHIP_ID_EM2883 = 36, }; diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index fb163ecd9216..285bc62bbe46 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -1848,32 +1848,28 @@ static DEFINE_MUTEX(em28xx_extension_devlist_lock); int em28xx_register_extension(struct em28xx_ops *ops) { - struct em28xx *h, *dev = NULL; - - list_for_each_entry(h, &em28xx_devlist, devlist) - dev = h; + struct em28xx *dev = NULL; mutex_lock(&em28xx_extension_devlist_lock); list_add_tail(&ops->next, &em28xx_extension_devlist); - if (dev) - ops->init(dev); - + list_for_each_entry(dev, &em28xx_devlist, devlist) { + if (dev) + ops->init(dev); + } printk(KERN_INFO "Em28xx: Initialized (%s) extension\n", ops->name); mutex_unlock(&em28xx_extension_devlist_lock); - return 0; } EXPORT_SYMBOL(em28xx_register_extension); void em28xx_unregister_extension(struct em28xx_ops *ops) { - struct em28xx *h, *dev = NULL; - - list_for_each_entry(h, &em28xx_devlist, devlist) - dev = h; + struct em28xx *dev = NULL; - if (dev) - ops->fini(dev); + list_for_each_entry(dev, &em28xx_devlist, devlist) { + if (dev) + ops->fini(dev); + } mutex_lock(&em28xx_extension_devlist_lock); printk(KERN_INFO "Em28xx: Removed (%s) extension\n", ops->name); diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c index 7cc8e9b19fb7..5ec5bb9a94d2 100644 --- a/drivers/media/video/pxa_camera.c +++ b/drivers/media/video/pxa_camera.c @@ -1019,12 +1019,12 @@ static int pxa_camera_probe(struct platform_device *pdev) struct pxa_camera_dev *pcdev; struct resource *res; void __iomem *base; - unsigned int irq; + int irq; int err = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); - if (!res || !irq) { + if (!res || irq < 0) { err = -ENODEV; goto exit; } diff --git a/drivers/media/video/saa7134/saa7134-alsa.c b/drivers/media/video/saa7134/saa7134-alsa.c index ba3082422a01..f118de6e3672 100644 --- a/drivers/media/video/saa7134/saa7134-alsa.c +++ b/drivers/media/video/saa7134/saa7134-alsa.c @@ -613,9 +613,15 @@ static int snd_card_saa7134_capture_open(struct snd_pcm_substream * substream) struct snd_pcm_runtime *runtime = substream->runtime; snd_card_saa7134_pcm_t *pcm; snd_card_saa7134_t *saa7134 = snd_pcm_substream_chip(substream); - struct saa7134_dev *dev = saa7134->dev; + struct saa7134_dev *dev; int amux, err; + if (!saa7134) { + printk(KERN_ERR "BUG: saa7134 can't find device struct." + " Can't proceed with open\n"); + return -ENODEV; + } + dev = saa7134->dev; mutex_lock(&dev->dmasound.lock); dev->dmasound.read_count = 0; diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index b111903aa322..2618cfa592e7 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -4114,11 +4114,7 @@ struct saa7134_board saa7134_boards[] = { .radio_type = UNSET, .tuner_addr = ADDR_UNSET, .radio_addr = ADDR_UNSET, - /* - TODO: .mpeg = SAA7134_MPEG_DVB, - */ - .inputs = {{ .name = name_tv, .vmux = 1, @@ -4157,7 +4153,7 @@ struct saa7134_board saa7134_boards[] = { } }, .radio = { .name = name_radio, - .amux = LINE1, + .amux = TV, }, }, [SAA7134_BOARD_AVERMEDIA_M115] = { @@ -4167,6 +4163,7 @@ struct saa7134_board saa7134_boards[] = { .radio_type = UNSET, .tuner_addr = ADDR_UNSET, .radio_addr = ADDR_UNSET, + .mpeg = SAA7134_MPEG_DVB, .inputs = {{ .name = name_tv, .vmux = 1, @@ -5351,22 +5348,21 @@ static int saa7134_xc2028_callback(struct saa7134_dev *dev, { switch (command) { case XC2028_TUNER_RESET: - saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0x06e20000, 0x06e20000); - saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x06a20000, 0x06a20000); - mdelay(250); - saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0x06e20000, 0); - saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x06a20000, 0); - mdelay(250); - saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0x06e20000, 0x06e20000); - saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x06a20000, 0x06a20000); - mdelay(250); - saa_andorl(SAA7133_ANALOG_IO_SELECT >> 2, 0x02, 0x02); - saa_andorl(SAA7134_ANALOG_IN_CTRL1 >> 2, 0x81, 0x81); - saa_andorl(SAA7134_AUDIO_CLOCK0 >> 2, 0x03187de7, 0x03187de7); - saa_andorl(SAA7134_AUDIO_PLL_CTRL >> 2, 0x03, 0x03); - saa_andorl(SAA7134_AUDIO_CLOCKS_PER_FIELD0 >> 2, - 0x0001e000, 0x0001e000); - return 0; + saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x00008000, 0x00000000); + saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x00008000, 0x00008000); + switch (dev->board) { + case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: + saa7134_set_gpio(dev, 23, 0); + msleep(10); + saa7134_set_gpio(dev, 23, 1); + break; + case SAA7134_BOARD_AVERMEDIA_A16D: + saa7134_set_gpio(dev, 21, 0); + msleep(10); + saa7134_set_gpio(dev, 21, 1); + break; + } + return 0; } return -EINVAL; } @@ -5553,9 +5549,7 @@ int saa7134_board_init1(struct saa7134_dev *dev) saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x08000000, 0x00000000); break; case SAA7134_BOARD_AVERMEDIA_CARDBUS: - case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: case SAA7134_BOARD_AVERMEDIA_M115: - case SAA7134_BOARD_AVERMEDIA_A16D: /* power-down tuner chip */ saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0xffffffff, 0); saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0xffffffff, 0); @@ -5565,6 +5559,18 @@ int saa7134_board_init1(struct saa7134_dev *dev) saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0xffffffff, 0xffffffff); msleep(10); break; + case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: + saa7134_set_gpio(dev, 23, 0); + msleep(10); + saa7134_set_gpio(dev, 23, 1); + break; + case SAA7134_BOARD_AVERMEDIA_A16D: + saa7134_set_gpio(dev, 21, 0); + msleep(10); + saa7134_set_gpio(dev, 21, 1); + msleep(1); + dev->has_remote = SAA7134_REMOTE_GPIO; + break; case SAA7134_BOARD_BEHOLD_COLUMBUS_TVFM: /* power-down tuner chip */ saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0x000A8004, 0x000A8004); @@ -5615,7 +5621,8 @@ int saa7134_board_init1(struct saa7134_dev *dev) saa_andorl(SAA7134_GPIO_GPMODE0 >> 2, 0x80040100, 0x80040100); saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x80040100, 0x00040100); printk("%s: %s: hybrid analog/dvb card\n" - "%s: Sorry, only the analog inputs are supported for now.\n", + "%s: Sorry, only analog s-video and composite input " + "are supported for now.\n", dev->name, card(dev).name, dev->name); break; } @@ -5675,6 +5682,7 @@ static void saa7134_tuner_setup(struct saa7134_dev *dev) switch (dev->board) { case SAA7134_BOARD_AVERMEDIA_A16D: + case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: ctl.demod = XC3028_FE_ZARLINK456; break; default: diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c index 469f93aac008..341b101b0357 100644 --- a/drivers/media/video/saa7134/saa7134-dvb.c +++ b/drivers/media/video/saa7134/saa7134-dvb.c @@ -153,12 +153,12 @@ static int mt352_aver777_init(struct dvb_frontend* fe) return 0; } -static int mt352_aver_a16d_init(struct dvb_frontend *fe) +static int mt352_avermedia_xc3028_init(struct dvb_frontend *fe) { - static u8 clock_config [] = { CLOCK_CTL, 0x38, 0x2d }; - static u8 reset [] = { RESET, 0x80 }; - static u8 adc_ctl_1_cfg [] = { ADC_CTL_1, 0x40 }; - static u8 agc_cfg [] = { AGC_TARGET, 0x28, 0xa0 }; + static u8 clock_config [] = { CLOCK_CTL, 0x38, 0x2d }; + static u8 reset [] = { RESET, 0x80 }; + static u8 adc_ctl_1_cfg [] = { ADC_CTL_1, 0x40 }; + static u8 agc_cfg [] = { AGC_TARGET, 0xe }; static u8 capt_range_cfg[] = { CAPT_RANGE, 0x33 }; mt352_write(fe, clock_config, sizeof(clock_config)); @@ -167,12 +167,9 @@ static int mt352_aver_a16d_init(struct dvb_frontend *fe) mt352_write(fe, adc_ctl_1_cfg, sizeof(adc_ctl_1_cfg)); mt352_write(fe, agc_cfg, sizeof(agc_cfg)); mt352_write(fe, capt_range_cfg, sizeof(capt_range_cfg)); - return 0; } - - static int mt352_pinnacle_tuner_set_params(struct dvb_frontend* fe, struct dvb_frontend_parameters* params) { @@ -215,14 +212,10 @@ static struct mt352_config avermedia_777 = { .demod_init = mt352_aver777_init, }; -static struct mt352_config avermedia_16d = { - .demod_address = 0xf, - .demod_init = mt352_aver_a16d_init, -}; - -static struct mt352_config avermedia_e506r_mt352_dev = { +static struct mt352_config avermedia_xc3028_mt352_dev = { .demod_address = (0x1e >> 1), .no_tuner = 1, + .demod_init = mt352_avermedia_xc3028_init, }; /* ================================================================== @@ -975,9 +968,10 @@ static int dvb_init(struct saa7134_dev *dev) } break; case SAA7134_BOARD_AVERMEDIA_A16D: - dprintk("avertv A16D dvb setup\n"); - dev->dvb.frontend = dvb_attach(mt352_attach, &avermedia_16d, - &dev->i2c_adap); + dprintk("AverMedia A16D dvb setup\n"); + dev->dvb.frontend = dvb_attach(mt352_attach, + &avermedia_xc3028_mt352_dev, + &dev->i2c_adap); attach_xc3028 = 1; break; case SAA7134_BOARD_MD7134: @@ -1091,7 +1085,8 @@ static int dvb_init(struct saa7134_dev *dev) ads_tech_duo_config.tuner_address); goto dettach_frontend; } - } + } else + wprintk("failed to attach tda10046\n"); break; case SAA7134_BOARD_TEVION_DVBT_220RF: if (configure_tda827x_fe(dev, &tevion_dvbt220rf_config, @@ -1260,11 +1255,14 @@ static int dvb_init(struct saa7134_dev *dev) goto dettach_frontend; break; case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: + dprintk("AverMedia E506R dvb setup\n"); + saa7134_set_gpio(dev, 25, 0); + msleep(10); + saa7134_set_gpio(dev, 25, 1); dev->dvb.frontend = dvb_attach(mt352_attach, - &avermedia_e506r_mt352_dev, - &dev->i2c_adap); + &avermedia_xc3028_mt352_dev, + &dev->i2c_adap); attach_xc3028 = 1; - break; case SAA7134_BOARD_MD7134_BRIDGE_2: dev->dvb.frontend = dvb_attach(tda10086_attach, &sd1878_4m, &dev->i2c_adap); @@ -1338,7 +1336,8 @@ static int dvb_init(struct saa7134_dev *dev) return ret; dettach_frontend: - dvb_frontend_detach(dev->dvb.frontend); + if (dev->dvb.frontend) + dvb_frontend_detach(dev->dvb.frontend); dev->dvb.frontend = NULL; return -1; diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index 81431ee41842..3ae71a340822 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -110,9 +110,10 @@ static int ts_release(struct inode *inode, struct file *file) { struct saa7134_dev *dev = file->private_data; + mutex_lock(&dev->empress_tsq.vb_lock); + videobuf_stop(&dev->empress_tsq); videobuf_mmap_free(&dev->empress_tsq); - dev->empress_users--; /* stop the encoder */ ts_reset_encoder(dev); @@ -121,6 +122,10 @@ static int ts_release(struct inode *inode, struct file *file) saa_writeb(SAA7134_AUDIO_MUTE_CTRL, saa_readb(SAA7134_AUDIO_MUTE_CTRL) | (1 << 6)); + dev->empress_users--; + + mutex_unlock(&dev->empress_tsq.vb_lock); + return 0; } @@ -218,8 +223,7 @@ static int empress_enum_fmt_cap(struct file *file, void *priv, static int empress_g_fmt_cap(struct file *file, void *priv, struct v4l2_format *f) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; saa7134_i2c_call_clients(dev, VIDIOC_G_FMT, f); @@ -232,8 +236,7 @@ static int empress_g_fmt_cap(struct file *file, void *priv, static int empress_s_fmt_cap(struct file *file, void *priv, struct v4l2_format *f) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; saa7134_i2c_call_clients(dev, VIDIOC_S_FMT, f); @@ -247,8 +250,7 @@ static int empress_s_fmt_cap(struct file *file, void *priv, static int empress_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *p) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_reqbufs(&dev->empress_tsq, p); } @@ -256,24 +258,21 @@ static int empress_reqbufs(struct file *file, void *priv, static int empress_querybuf(struct file *file, void *priv, struct v4l2_buffer *b) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_querybuf(&dev->empress_tsq, b); } static int empress_qbuf(struct file *file, void *priv, struct v4l2_buffer *b) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_qbuf(&dev->empress_tsq, b); } static int empress_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_dqbuf(&dev->empress_tsq, b, file->f_flags & O_NONBLOCK); @@ -282,8 +281,7 @@ static int empress_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b) static int empress_streamon(struct file *file, void *priv, enum v4l2_buf_type type) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_streamon(&dev->empress_tsq); } @@ -291,8 +289,7 @@ static int empress_streamon(struct file *file, void *priv, static int empress_streamoff(struct file *file, void *priv, enum v4l2_buf_type type) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; return videobuf_streamoff(&dev->empress_tsq); } @@ -300,8 +297,7 @@ static int empress_streamoff(struct file *file, void *priv, static int empress_s_ext_ctrls(struct file *file, void *priv, struct v4l2_ext_controls *ctrls) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; /* count == 0 is abused in saa6752hs.c, so that special case is handled here explicitly. */ @@ -320,8 +316,7 @@ static int empress_s_ext_ctrls(struct file *file, void *priv, static int empress_g_ext_ctrls(struct file *file, void *priv, struct v4l2_ext_controls *ctrls) { - struct saa7134_fh *fh = priv; - struct saa7134_dev *dev = fh->dev; + struct saa7134_dev *dev = file->private_data; if (ctrls->ctrl_class != V4L2_CTRL_CLASS_MPEG) return -EINVAL; diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c index 919632b10aae..76e6501d238d 100644 --- a/drivers/media/video/saa7134/saa7134-input.c +++ b/drivers/media/video/saa7134/saa7134-input.c @@ -323,6 +323,15 @@ int saa7134_input_init1(struct saa7134_dev *dev) saa_setb(SAA7134_GPIO_GPMODE1, 0x1); saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1); break; + case SAA7134_BOARD_AVERMEDIA_A16D: + ir_codes = ir_codes_avermedia_a16d; + mask_keycode = 0x02F200; + mask_keydown = 0x000400; + polling = 50; /* ms */ + /* Without this we won't receive key up events */ + saa_setb(SAA7134_GPIO_GPMODE1, 0x1); + saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1); + break; case SAA7134_BOARD_KWORLD_TERMINATOR: ir_codes = ir_codes_pixelview; mask_keycode = 0x00001f; diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c index a1b92446c8b4..d015bfe00950 100644 --- a/drivers/media/video/soc_camera.c +++ b/drivers/media/video/soc_camera.c @@ -763,15 +763,6 @@ static struct device_driver ic_drv = { .owner = THIS_MODULE, }; -/* - * Image capture host - this is a host device, not a bus device, so, - * no bus reference, no probing. - */ -static struct class soc_camera_host_class = { - .owner = THIS_MODULE, - .name = "camera_host", -}; - static void dummy_release(struct device *dev) { } @@ -801,7 +792,6 @@ int soc_camera_host_register(struct soc_camera_host *ici) /* Number might be equal to the platform device ID */ sprintf(ici->dev.bus_id, "camera_host%d", ici->nr); - ici->dev.class = &soc_camera_host_class; mutex_lock(&list_lock); list_for_each_entry(ix, &hosts, list) { @@ -1003,14 +993,9 @@ static int __init soc_camera_init(void) ret = driver_register(&ic_drv); if (ret) goto edrvr; - ret = class_register(&soc_camera_host_class); - if (ret) - goto eclr; return 0; -eclr: - driver_unregister(&ic_drv); edrvr: bus_unregister(&soc_camera_bus_type); return ret; @@ -1018,7 +1003,6 @@ edrvr: static void __exit soc_camera_exit(void) { - class_unregister(&soc_camera_host_class); driver_unregister(&ic_drv); bus_unregister(&soc_camera_bus_type); } diff --git a/drivers/media/video/uvc/Makefile b/drivers/media/video/uvc/Makefile new file mode 100644 index 000000000000..968c1994eda0 --- /dev/null +++ b/drivers/media/video/uvc/Makefile @@ -0,0 +1,3 @@ +uvcvideo-objs := uvc_driver.o uvc_queue.o uvc_v4l2.o uvc_video.o uvc_ctrl.o \ + uvc_status.o uvc_isight.o +obj-$(CONFIG_USB_VIDEO_CLASS) += uvcvideo.o diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c new file mode 100644 index 000000000000..f0ee46d15540 --- /dev/null +++ b/drivers/media/video/uvc/uvc_ctrl.c @@ -0,0 +1,1256 @@ +/* + * uvc_ctrl.c -- USB Video Class driver - Controls + * + * Copyright (C) 2005-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/usb.h> +#include <linux/videodev2.h> +#include <linux/vmalloc.h> +#include <linux/wait.h> +#include <asm/atomic.h> + +#include "uvcvideo.h" + +#define UVC_CTRL_NDATA 2 +#define UVC_CTRL_DATA_CURRENT 0 +#define UVC_CTRL_DATA_BACKUP 1 + +/* ------------------------------------------------------------------------ + * Control, formats, ... + */ + +static struct uvc_control_info uvc_ctrls[] = { + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_BRIGHTNESS_CONTROL, + .index = 0, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_CONTRAST_CONTROL, + .index = 1, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_HUE_CONTROL, + .index = 2, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE | UVC_CONTROL_AUTO_UPDATE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_SATURATION_CONTROL, + .index = 3, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_SHARPNESS_CONTROL, + .index = 4, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_GAMMA_CONTROL, + .index = 5, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_BACKLIGHT_COMPENSATION_CONTROL, + .index = 8, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_GAIN_CONTROL, + .index = 9, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_POWER_LINE_FREQUENCY_CONTROL, + .index = 10, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_HUE_AUTO_CONTROL, + .index = 11, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_GET_DEF | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_AE_MODE_CONTROL, + .index = 1, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_GET_DEF | UVC_CONTROL_GET_RES + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_AE_PRIORITY_CONTROL, + .index = 2, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_EXPOSURE_TIME_ABSOLUTE_CONTROL, + .index = 3, + .size = 4, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_FOCUS_ABSOLUTE_CONTROL, + .index = 5, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE | UVC_CONTROL_AUTO_UPDATE, + }, + { + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_FOCUS_AUTO_CONTROL, + .index = 17, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_GET_DEF | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL, + .index = 12, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_GET_DEF | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_TEMPERATURE_CONTROL, + .index = 6, + .size = 2, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE | UVC_CONTROL_AUTO_UPDATE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL, + .index = 13, + .size = 1, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_CUR + | UVC_CONTROL_GET_DEF | UVC_CONTROL_RESTORE, + }, + { + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_COMPONENT_CONTROL, + .index = 7, + .size = 4, + .flags = UVC_CONTROL_SET_CUR | UVC_CONTROL_GET_RANGE + | UVC_CONTROL_RESTORE | UVC_CONTROL_AUTO_UPDATE, + }, +}; + +static struct uvc_menu_info power_line_frequency_controls[] = { + { 0, "Disabled" }, + { 1, "50 Hz" }, + { 2, "60 Hz" }, +}; + +static struct uvc_menu_info exposure_auto_controls[] = { + { 1, "Manual Mode" }, + { 2, "Auto Mode" }, + { 4, "Shutter Priority Mode" }, + { 8, "Aperture Priority Mode" }, +}; + +static struct uvc_control_mapping uvc_ctrl_mappings[] = { + { + .id = V4L2_CID_BRIGHTNESS, + .name = "Brightness", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_BRIGHTNESS_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_SIGNED, + }, + { + .id = V4L2_CID_CONTRAST, + .name = "Contrast", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_CONTRAST_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_HUE, + .name = "Hue", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_HUE_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_SIGNED, + }, + { + .id = V4L2_CID_SATURATION, + .name = "Saturation", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_SATURATION_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_SHARPNESS, + .name = "Sharpness", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_SHARPNESS_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_GAMMA, + .name = "Gamma", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_GAMMA_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_BACKLIGHT_COMPENSATION, + .name = "Backlight Compensation", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_BACKLIGHT_COMPENSATION_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_GAIN, + .name = "Gain", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_GAIN_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_POWER_LINE_FREQUENCY, + .name = "Power Line Frequency", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_POWER_LINE_FREQUENCY_CONTROL, + .size = 2, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_MENU, + .data_type = UVC_CTRL_DATA_TYPE_ENUM, + .menu_info = power_line_frequency_controls, + .menu_count = ARRAY_SIZE(power_line_frequency_controls), + }, + { + .id = V4L2_CID_HUE_AUTO, + .name = "Hue, Auto", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_HUE_AUTO_CONTROL, + .size = 1, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_BOOLEAN, + .data_type = UVC_CTRL_DATA_TYPE_BOOLEAN, + }, + { + .id = V4L2_CID_EXPOSURE_AUTO, + .name = "Exposure, Auto", + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_AE_MODE_CONTROL, + .size = 4, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_MENU, + .data_type = UVC_CTRL_DATA_TYPE_BITMASK, + .menu_info = exposure_auto_controls, + .menu_count = ARRAY_SIZE(exposure_auto_controls), + }, + { + .id = V4L2_CID_EXPOSURE_AUTO_PRIORITY, + .name = "Exposure, Auto Priority", + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_AE_PRIORITY_CONTROL, + .size = 1, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_BOOLEAN, + .data_type = UVC_CTRL_DATA_TYPE_BOOLEAN, + }, + { + .id = V4L2_CID_EXPOSURE_ABSOLUTE, + .name = "Exposure (Absolute)", + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_EXPOSURE_TIME_ABSOLUTE_CONTROL, + .size = 32, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_AUTO_WHITE_BALANCE, + .name = "White Balance Temperature, Auto", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL, + .size = 1, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_BOOLEAN, + .data_type = UVC_CTRL_DATA_TYPE_BOOLEAN, + }, + { + .id = V4L2_CID_WHITE_BALANCE_TEMPERATURE, + .name = "White Balance Temperature", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_TEMPERATURE_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_AUTO_WHITE_BALANCE, + .name = "White Balance Component, Auto", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL, + .size = 1, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_BOOLEAN, + .data_type = UVC_CTRL_DATA_TYPE_BOOLEAN, + }, + { + .id = V4L2_CID_BLUE_BALANCE, + .name = "White Balance Blue Component", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_COMPONENT_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_SIGNED, + }, + { + .id = V4L2_CID_RED_BALANCE, + .name = "White Balance Red Component", + .entity = UVC_GUID_UVC_PROCESSING, + .selector = PU_WHITE_BALANCE_COMPONENT_CONTROL, + .size = 16, + .offset = 16, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_SIGNED, + }, + { + .id = V4L2_CID_FOCUS_ABSOLUTE, + .name = "Focus (absolute)", + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_FOCUS_ABSOLUTE_CONTROL, + .size = 16, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_INTEGER, + .data_type = UVC_CTRL_DATA_TYPE_UNSIGNED, + }, + { + .id = V4L2_CID_FOCUS_AUTO, + .name = "Focus, Auto", + .entity = UVC_GUID_UVC_CAMERA, + .selector = CT_FOCUS_AUTO_CONTROL, + .size = 1, + .offset = 0, + .v4l2_type = V4L2_CTRL_TYPE_BOOLEAN, + .data_type = UVC_CTRL_DATA_TYPE_BOOLEAN, + }, +}; + +/* ------------------------------------------------------------------------ + * Utility functions + */ + +static inline __u8 *uvc_ctrl_data(struct uvc_control *ctrl, int id) +{ + return ctrl->data + id * ctrl->info->size; +} + +static inline int uvc_get_bit(const __u8 *data, int bit) +{ + return (data[bit >> 3] >> (bit & 7)) & 1; +} + +/* Extract the bit string specified by mapping->offset and mapping->size + * from the little-endian data stored at 'data' and return the result as + * a signed 32bit integer. Sign extension will be performed if the mapping + * references a signed data type. + */ +static __s32 uvc_get_le_value(const __u8 *data, + struct uvc_control_mapping *mapping) +{ + int bits = mapping->size; + int offset = mapping->offset; + __s32 value = 0; + __u8 mask; + + data += offset / 8; + offset &= 7; + mask = ((1LL << bits) - 1) << offset; + + for (; bits > 0; data++) { + __u8 byte = *data & mask; + value |= offset > 0 ? (byte >> offset) : (byte << (-offset)); + bits -= 8 - (offset > 0 ? offset : 0); + offset -= 8; + mask = (1 << bits) - 1; + } + + /* Sign-extend the value if needed */ + if (mapping->data_type == UVC_CTRL_DATA_TYPE_SIGNED) + value |= -(value & (1 << (mapping->size - 1))); + + return value; +} + +/* Set the bit string specified by mapping->offset and mapping->size + * in the little-endian data stored at 'data' to the value 'value'. + */ +static void uvc_set_le_value(__s32 value, __u8 *data, + struct uvc_control_mapping *mapping) +{ + int bits = mapping->size; + int offset = mapping->offset; + __u8 mask; + + data += offset / 8; + offset &= 7; + + for (; bits > 0; data++) { + mask = ((1LL << bits) - 1) << offset; + *data = (*data & ~mask) | ((value << offset) & mask); + value >>= offset ? offset : 8; + bits -= 8 - offset; + offset = 0; + } +} + +/* ------------------------------------------------------------------------ + * Terminal and unit management + */ + +static const __u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING; +static const __u8 uvc_camera_guid[16] = UVC_GUID_UVC_CAMERA; +static const __u8 uvc_media_transport_input_guid[16] = + UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT; + +static int uvc_entity_match_guid(struct uvc_entity *entity, __u8 guid[16]) +{ + switch (UVC_ENTITY_TYPE(entity)) { + case ITT_CAMERA: + return memcmp(uvc_camera_guid, guid, 16) == 0; + + case ITT_MEDIA_TRANSPORT_INPUT: + return memcmp(uvc_media_transport_input_guid, guid, 16) == 0; + + case VC_PROCESSING_UNIT: + return memcmp(uvc_processing_guid, guid, 16) == 0; + + case VC_EXTENSION_UNIT: + return memcmp(entity->extension.guidExtensionCode, + guid, 16) == 0; + + default: + return 0; + } +} + +/* ------------------------------------------------------------------------ + * UVC Controls + */ + +static void __uvc_find_control(struct uvc_entity *entity, __u32 v4l2_id, + struct uvc_control_mapping **mapping, struct uvc_control **control, + int next) +{ + struct uvc_control *ctrl; + struct uvc_control_mapping *map; + unsigned int i; + + if (entity == NULL) + return; + + for (i = 0; i < entity->ncontrols; ++i) { + ctrl = &entity->controls[i]; + if (ctrl->info == NULL) + continue; + + list_for_each_entry(map, &ctrl->info->mappings, list) { + if ((map->id == v4l2_id) && !next) { + *control = ctrl; + *mapping = map; + return; + } + + if ((*mapping == NULL || (*mapping)->id > map->id) && + (map->id > v4l2_id) && next) { + *control = ctrl; + *mapping = map; + } + } + } +} + +struct uvc_control *uvc_find_control(struct uvc_video_device *video, + __u32 v4l2_id, struct uvc_control_mapping **mapping) +{ + struct uvc_control *ctrl = NULL; + struct uvc_entity *entity; + int next = v4l2_id & V4L2_CTRL_FLAG_NEXT_CTRL; + + *mapping = NULL; + + /* Mask the query flags. */ + v4l2_id &= V4L2_CTRL_ID_MASK; + + /* Find the control. */ + __uvc_find_control(video->processing, v4l2_id, mapping, &ctrl, next); + if (ctrl && !next) + return ctrl; + + list_for_each_entry(entity, &video->iterms, chain) { + __uvc_find_control(entity, v4l2_id, mapping, &ctrl, next); + if (ctrl && !next) + return ctrl; + } + + list_for_each_entry(entity, &video->extensions, chain) { + __uvc_find_control(entity, v4l2_id, mapping, &ctrl, next); + if (ctrl && !next) + return ctrl; + } + + if (ctrl == NULL && !next) + uvc_trace(UVC_TRACE_CONTROL, "Control 0x%08x not found.\n", + v4l2_id); + + return ctrl; +} + +int uvc_query_v4l2_ctrl(struct uvc_video_device *video, + struct v4l2_queryctrl *v4l2_ctrl) +{ + struct uvc_control *ctrl; + struct uvc_control_mapping *mapping; + struct uvc_menu_info *menu; + unsigned int i; + __u8 data[8]; + int ret; + + ctrl = uvc_find_control(video, v4l2_ctrl->id, &mapping); + if (ctrl == NULL) + return -EINVAL; + + v4l2_ctrl->id = mapping->id; + v4l2_ctrl->type = mapping->v4l2_type; + strncpy(v4l2_ctrl->name, mapping->name, sizeof v4l2_ctrl->name); + v4l2_ctrl->flags = 0; + + if (!(ctrl->info->flags & UVC_CONTROL_SET_CUR)) + v4l2_ctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY; + + if (ctrl->info->flags & UVC_CONTROL_GET_DEF) { + if ((ret = uvc_query_ctrl(video->dev, GET_DEF, ctrl->entity->id, + video->dev->intfnum, ctrl->info->selector, + &data, ctrl->info->size)) < 0) + return ret; + v4l2_ctrl->default_value = uvc_get_le_value(data, mapping); + } + + if (mapping->v4l2_type == V4L2_CTRL_TYPE_MENU) { + v4l2_ctrl->minimum = 0; + v4l2_ctrl->maximum = mapping->menu_count - 1; + v4l2_ctrl->step = 1; + + menu = mapping->menu_info; + for (i = 0; i < mapping->menu_count; ++i, ++menu) { + if (menu->value == v4l2_ctrl->default_value) { + v4l2_ctrl->default_value = i; + break; + } + } + + return 0; + } + + if (ctrl->info->flags & UVC_CONTROL_GET_MIN) { + if ((ret = uvc_query_ctrl(video->dev, GET_MIN, ctrl->entity->id, + video->dev->intfnum, ctrl->info->selector, + &data, ctrl->info->size)) < 0) + return ret; + v4l2_ctrl->minimum = uvc_get_le_value(data, mapping); + } + if (ctrl->info->flags & UVC_CONTROL_GET_MAX) { + if ((ret = uvc_query_ctrl(video->dev, GET_MAX, ctrl->entity->id, + video->dev->intfnum, ctrl->info->selector, + &data, ctrl->info->size)) < 0) + return ret; + v4l2_ctrl->maximum = uvc_get_le_value(data, mapping); + } + if (ctrl->info->flags & UVC_CONTROL_GET_RES) { + if ((ret = uvc_query_ctrl(video->dev, GET_RES, ctrl->entity->id, + video->dev->intfnum, ctrl->info->selector, + &data, ctrl->info->size)) < 0) + return ret; + v4l2_ctrl->step = uvc_get_le_value(data, mapping); + } + + return 0; +} + + +/* -------------------------------------------------------------------------- + * Control transactions + * + * To make extended set operations as atomic as the hardware allows, controls + * are handled using begin/commit/rollback operations. + * + * At the beginning of a set request, uvc_ctrl_begin should be called to + * initialize the request. This function acquires the control lock. + * + * When setting a control, the new value is stored in the control data field + * at position UVC_CTRL_DATA_CURRENT. The control is then marked as dirty for + * later processing. If the UVC and V4L2 control sizes differ, the current + * value is loaded from the hardware before storing the new value in the data + * field. + * + * After processing all controls in the transaction, uvc_ctrl_commit or + * uvc_ctrl_rollback must be called to apply the pending changes to the + * hardware or revert them. When applying changes, all controls marked as + * dirty will be modified in the UVC device, and the dirty flag will be + * cleared. When reverting controls, the control data field + * UVC_CTRL_DATA_CURRENT is reverted to its previous value + * (UVC_CTRL_DATA_BACKUP) for all dirty controls. Both functions release the + * control lock. + */ +int uvc_ctrl_begin(struct uvc_video_device *video) +{ + return mutex_lock_interruptible(&video->ctrl_mutex) ? -ERESTARTSYS : 0; +} + +static int uvc_ctrl_commit_entity(struct uvc_device *dev, + struct uvc_entity *entity, int rollback) +{ + struct uvc_control *ctrl; + unsigned int i; + int ret; + + if (entity == NULL) + return 0; + + for (i = 0; i < entity->ncontrols; ++i) { + ctrl = &entity->controls[i]; + if (ctrl->info == NULL || !ctrl->dirty) + continue; + + if (!rollback) + ret = uvc_query_ctrl(dev, SET_CUR, ctrl->entity->id, + dev->intfnum, ctrl->info->selector, + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + ctrl->info->size); + else + ret = 0; + + if (rollback || ret < 0) + memcpy(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_BACKUP), + ctrl->info->size); + + if ((ctrl->info->flags & UVC_CONTROL_GET_CUR) == 0) + ctrl->loaded = 0; + + ctrl->dirty = 0; + + if (ret < 0) + return ret; + } + + return 0; +} + +int __uvc_ctrl_commit(struct uvc_video_device *video, int rollback) +{ + struct uvc_entity *entity; + int ret = 0; + + /* Find the control. */ + ret = uvc_ctrl_commit_entity(video->dev, video->processing, rollback); + if (ret < 0) + goto done; + + list_for_each_entry(entity, &video->iterms, chain) { + ret = uvc_ctrl_commit_entity(video->dev, entity, rollback); + if (ret < 0) + goto done; + } + + list_for_each_entry(entity, &video->extensions, chain) { + ret = uvc_ctrl_commit_entity(video->dev, entity, rollback); + if (ret < 0) + goto done; + } + +done: + mutex_unlock(&video->ctrl_mutex); + return ret; +} + +int uvc_ctrl_get(struct uvc_video_device *video, + struct v4l2_ext_control *xctrl) +{ + struct uvc_control *ctrl; + struct uvc_control_mapping *mapping; + struct uvc_menu_info *menu; + unsigned int i; + int ret; + + ctrl = uvc_find_control(video, xctrl->id, &mapping); + if (ctrl == NULL || (ctrl->info->flags & UVC_CONTROL_GET_CUR) == 0) + return -EINVAL; + + if (!ctrl->loaded) { + ret = uvc_query_ctrl(video->dev, GET_CUR, ctrl->entity->id, + video->dev->intfnum, ctrl->info->selector, + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + ctrl->info->size); + if (ret < 0) + return ret; + + if ((ctrl->info->flags & UVC_CONTROL_AUTO_UPDATE) == 0) + ctrl->loaded = 1; + } + + xctrl->value = uvc_get_le_value( + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), mapping); + + if (mapping->v4l2_type == V4L2_CTRL_TYPE_MENU) { + menu = mapping->menu_info; + for (i = 0; i < mapping->menu_count; ++i, ++menu) { + if (menu->value == xctrl->value) { + xctrl->value = i; + break; + } + } + } + + return 0; +} + +int uvc_ctrl_set(struct uvc_video_device *video, + struct v4l2_ext_control *xctrl) +{ + struct uvc_control *ctrl; + struct uvc_control_mapping *mapping; + s32 value = xctrl->value; + int ret; + + ctrl = uvc_find_control(video, xctrl->id, &mapping); + if (ctrl == NULL || (ctrl->info->flags & UVC_CONTROL_SET_CUR) == 0) + return -EINVAL; + + if (mapping->v4l2_type == V4L2_CTRL_TYPE_MENU) { + if (value < 0 || value >= mapping->menu_count) + return -EINVAL; + value = mapping->menu_info[value].value; + } + + if (!ctrl->loaded && (ctrl->info->size * 8) != mapping->size) { + if ((ctrl->info->flags & UVC_CONTROL_GET_CUR) == 0) { + memset(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + 0, ctrl->info->size); + } else { + ret = uvc_query_ctrl(video->dev, GET_CUR, + ctrl->entity->id, video->dev->intfnum, + ctrl->info->selector, + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + ctrl->info->size); + if (ret < 0) + return ret; + } + + if ((ctrl->info->flags & UVC_CONTROL_AUTO_UPDATE) == 0) + ctrl->loaded = 1; + } + + if (!ctrl->dirty) { + memcpy(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_BACKUP), + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + ctrl->info->size); + } + + uvc_set_le_value(value, + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), mapping); + + ctrl->dirty = 1; + ctrl->modified = 1; + return 0; +} + +/* -------------------------------------------------------------------------- + * Dynamic controls + */ + +int uvc_xu_ctrl_query(struct uvc_video_device *video, + struct uvc_xu_control *xctrl, int set) +{ + struct uvc_entity *entity; + struct uvc_control *ctrl = NULL; + unsigned int i, found = 0; + __u8 *data; + int ret; + + /* Find the extension unit. */ + list_for_each_entry(entity, &video->extensions, chain) { + if (entity->id == xctrl->unit) + break; + } + + if (entity->id != xctrl->unit) { + uvc_trace(UVC_TRACE_CONTROL, "Extension unit %u not found.\n", + xctrl->unit); + return -EINVAL; + } + + /* Find the control. */ + for (i = 0; i < entity->ncontrols; ++i) { + ctrl = &entity->controls[i]; + if (ctrl->info == NULL) + continue; + + if (ctrl->info->selector == xctrl->selector) { + found = 1; + break; + } + } + + if (!found) { + uvc_trace(UVC_TRACE_CONTROL, + "Control " UVC_GUID_FORMAT "/%u not found.\n", + UVC_GUID_ARGS(entity->extension.guidExtensionCode), + xctrl->selector); + return -EINVAL; + } + + /* Validate control data size. */ + if (ctrl->info->size != xctrl->size) + return -EINVAL; + + if ((set && !(ctrl->info->flags & UVC_CONTROL_SET_CUR)) || + (!set && !(ctrl->info->flags & UVC_CONTROL_GET_CUR))) + return -EINVAL; + + if (mutex_lock_interruptible(&video->ctrl_mutex)) + return -ERESTARTSYS; + + memcpy(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_BACKUP), + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + xctrl->size); + data = uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT); + + if (set && copy_from_user(data, xctrl->data, xctrl->size)) { + ret = -EFAULT; + goto out; + } + + ret = uvc_query_ctrl(video->dev, set ? SET_CUR : GET_CUR, xctrl->unit, + video->dev->intfnum, xctrl->selector, data, + xctrl->size); + if (ret < 0) + goto out; + + if (!set && copy_to_user(xctrl->data, data, xctrl->size)) { + ret = -EFAULT; + goto out; + } + +out: + if (ret) + memcpy(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT), + uvc_ctrl_data(ctrl, UVC_CTRL_DATA_BACKUP), + xctrl->size); + + mutex_unlock(&video->ctrl_mutex); + return ret; +} + +/* -------------------------------------------------------------------------- + * Suspend/resume + */ + +/* + * Restore control values after resume, skipping controls that haven't been + * changed. + * + * TODO + * - Don't restore modified controls that are back to their default value. + * - Handle restore order (Auto-Exposure Mode should be restored before + * Exposure Time). + */ +int uvc_ctrl_resume_device(struct uvc_device *dev) +{ + struct uvc_control *ctrl; + struct uvc_entity *entity; + unsigned int i; + int ret; + + /* Walk the entities list and restore controls when possible. */ + list_for_each_entry(entity, &dev->entities, list) { + + for (i = 0; i < entity->ncontrols; ++i) { + ctrl = &entity->controls[i]; + + if (ctrl->info == NULL || !ctrl->modified || + (ctrl->info->flags & UVC_CONTROL_RESTORE) == 0) + continue; + + printk(KERN_INFO "restoring control " UVC_GUID_FORMAT + "/%u/%u\n", UVC_GUID_ARGS(ctrl->info->entity), + ctrl->info->index, ctrl->info->selector); + ctrl->dirty = 1; + } + + ret = uvc_ctrl_commit_entity(dev, entity, 0); + if (ret < 0) + return ret; + } + + return 0; +} + +/* -------------------------------------------------------------------------- + * Control and mapping handling + */ + +static void uvc_ctrl_add_ctrl(struct uvc_device *dev, + struct uvc_control_info *info) +{ + struct uvc_entity *entity; + struct uvc_control *ctrl = NULL; + int ret, found = 0; + unsigned int i; + + list_for_each_entry(entity, &dev->entities, list) { + if (!uvc_entity_match_guid(entity, info->entity)) + continue; + + for (i = 0; i < entity->ncontrols; ++i) { + ctrl = &entity->controls[i]; + if (ctrl->index == info->index) { + found = 1; + break; + } + } + + if (found) + break; + } + + if (!found) + return; + + if (UVC_ENTITY_TYPE(entity) == VC_EXTENSION_UNIT) { + /* Check if the device control information and length match + * the user supplied information. + */ + __u32 flags; + __le16 size; + __u8 inf; + + if ((ret = uvc_query_ctrl(dev, GET_LEN, ctrl->entity->id, + dev->intfnum, info->selector, (__u8 *)&size, 2)) < 0) { + uvc_trace(UVC_TRACE_CONTROL, "GET_LEN failed on " + "control " UVC_GUID_FORMAT "/%u (%d).\n", + UVC_GUID_ARGS(info->entity), info->selector, + ret); + return; + } + + if (info->size != le16_to_cpu(size)) { + uvc_trace(UVC_TRACE_CONTROL, "Control " UVC_GUID_FORMAT + "/%u size doesn't match user supplied " + "value.\n", UVC_GUID_ARGS(info->entity), + info->selector); + return; + } + + if ((ret = uvc_query_ctrl(dev, GET_INFO, ctrl->entity->id, + dev->intfnum, info->selector, &inf, 1)) < 0) { + uvc_trace(UVC_TRACE_CONTROL, "GET_INFO failed on " + "control " UVC_GUID_FORMAT "/%u (%d).\n", + UVC_GUID_ARGS(info->entity), info->selector, + ret); + return; + } + + flags = info->flags; + if (((flags & UVC_CONTROL_GET_CUR) && !(inf & (1 << 0))) || + ((flags & UVC_CONTROL_SET_CUR) && !(inf & (1 << 1)))) { + uvc_trace(UVC_TRACE_CONTROL, "Control " + UVC_GUID_FORMAT "/%u flags don't match " + "supported operations.\n", + UVC_GUID_ARGS(info->entity), info->selector); + return; + } + } + + ctrl->info = info; + ctrl->data = kmalloc(ctrl->info->size * UVC_CTRL_NDATA, GFP_KERNEL); + uvc_trace(UVC_TRACE_CONTROL, "Added control " UVC_GUID_FORMAT "/%u " + "to device %s entity %u\n", UVC_GUID_ARGS(ctrl->info->entity), + ctrl->info->selector, dev->udev->devpath, entity->id); +} + +/* + * Add an item to the UVC control information list, and instantiate a control + * structure for each device that supports the control. + */ +int uvc_ctrl_add_info(struct uvc_control_info *info) +{ + struct uvc_control_info *ctrl; + struct uvc_device *dev; + int ret = 0; + + /* Find matching controls by walking the devices, entities and + * controls list. + */ + mutex_lock(&uvc_driver.ctrl_mutex); + + /* First check if the list contains a control matching the new one. + * Bail out if it does. + */ + list_for_each_entry(ctrl, &uvc_driver.controls, list) { + if (memcmp(ctrl->entity, info->entity, 16)) + continue; + + if (ctrl->selector == info->selector) { + uvc_trace(UVC_TRACE_CONTROL, "Control " + UVC_GUID_FORMAT "/%u is already defined.\n", + UVC_GUID_ARGS(info->entity), info->selector); + ret = -EEXIST; + goto end; + } + if (ctrl->index == info->index) { + uvc_trace(UVC_TRACE_CONTROL, "Control " + UVC_GUID_FORMAT "/%u would overwrite index " + "%d.\n", UVC_GUID_ARGS(info->entity), + info->selector, info->index); + ret = -EEXIST; + goto end; + } + } + + list_for_each_entry(dev, &uvc_driver.devices, list) + uvc_ctrl_add_ctrl(dev, info); + + INIT_LIST_HEAD(&info->mappings); + list_add_tail(&info->list, &uvc_driver.controls); +end: + mutex_unlock(&uvc_driver.ctrl_mutex); + return ret; +} + +int uvc_ctrl_add_mapping(struct uvc_control_mapping *mapping) +{ + struct uvc_control_info *info; + struct uvc_control_mapping *map; + int ret = -EINVAL; + + if (mapping->id & ~V4L2_CTRL_ID_MASK) { + uvc_trace(UVC_TRACE_CONTROL, "Can't add mapping '%s' with " + "invalid control id 0x%08x\n", mapping->name, + mapping->id); + return -EINVAL; + } + + mutex_lock(&uvc_driver.ctrl_mutex); + list_for_each_entry(info, &uvc_driver.controls, list) { + if (memcmp(info->entity, mapping->entity, 16) || + info->selector != mapping->selector) + continue; + + if (info->size * 8 < mapping->size + mapping->offset) { + uvc_trace(UVC_TRACE_CONTROL, "Mapping '%s' would " + "overflow control " UVC_GUID_FORMAT "/%u\n", + mapping->name, UVC_GUID_ARGS(info->entity), + info->selector); + ret = -EOVERFLOW; + goto end; + } + + /* Check if the list contains a mapping matching the new one. + * Bail out if it does. + */ + list_for_each_entry(map, &info->mappings, list) { + if (map->id == mapping->id) { + uvc_trace(UVC_TRACE_CONTROL, "Mapping '%s' is " + "already defined.\n", mapping->name); + ret = -EEXIST; + goto end; + } + } + + mapping->ctrl = info; + list_add_tail(&mapping->list, &info->mappings); + uvc_trace(UVC_TRACE_CONTROL, "Adding mapping %s to control " + UVC_GUID_FORMAT "/%u.\n", mapping->name, + UVC_GUID_ARGS(info->entity), info->selector); + + ret = 0; + break; + } +end: + mutex_unlock(&uvc_driver.ctrl_mutex); + return ret; +} + +/* + * Initialize device controls. + */ +int uvc_ctrl_init_device(struct uvc_device *dev) +{ + struct uvc_control_info *info; + struct uvc_control *ctrl; + struct uvc_entity *entity; + unsigned int i; + + /* Walk the entities list and instantiate controls */ + list_for_each_entry(entity, &dev->entities, list) { + unsigned int bControlSize = 0, ncontrols = 0; + __u8 *bmControls = NULL; + + if (UVC_ENTITY_TYPE(entity) == VC_EXTENSION_UNIT) { + bmControls = entity->extension.bmControls; + bControlSize = entity->extension.bControlSize; + } else if (UVC_ENTITY_TYPE(entity) == VC_PROCESSING_UNIT) { + bmControls = entity->processing.bmControls; + bControlSize = entity->processing.bControlSize; + } else if (UVC_ENTITY_TYPE(entity) == ITT_CAMERA) { + bmControls = entity->camera.bmControls; + bControlSize = entity->camera.bControlSize; + } + + for (i = 0; i < bControlSize; ++i) + ncontrols += hweight8(bmControls[i]); + + if (ncontrols == 0) + continue; + + entity->controls = kzalloc(ncontrols*sizeof *ctrl, GFP_KERNEL); + if (entity->controls == NULL) + return -ENOMEM; + + entity->ncontrols = ncontrols; + + ctrl = entity->controls; + for (i = 0; i < bControlSize * 8; ++i) { + if (uvc_get_bit(bmControls, i) == 0) + continue; + + ctrl->entity = entity; + ctrl->index = i; + ctrl++; + } + } + + /* Walk the controls info list and associate them with the device + * controls, then add the device to the global device list. This has + * to be done while holding the controls lock, to make sure + * uvc_ctrl_add_info() will not get called in-between. + */ + mutex_lock(&uvc_driver.ctrl_mutex); + list_for_each_entry(info, &uvc_driver.controls, list) + uvc_ctrl_add_ctrl(dev, info); + + list_add_tail(&dev->list, &uvc_driver.devices); + mutex_unlock(&uvc_driver.ctrl_mutex); + + return 0; +} + +/* + * Cleanup device controls. + */ +void uvc_ctrl_cleanup_device(struct uvc_device *dev) +{ + struct uvc_entity *entity; + unsigned int i; + + /* Remove the device from the global devices list */ + mutex_lock(&uvc_driver.ctrl_mutex); + if (dev->list.next != NULL) + list_del(&dev->list); + mutex_unlock(&uvc_driver.ctrl_mutex); + + list_for_each_entry(entity, &dev->entities, list) { + for (i = 0; i < entity->ncontrols; ++i) + kfree(entity->controls[i].data); + + kfree(entity->controls); + } +} + +void uvc_ctrl_init(void) +{ + struct uvc_control_info *ctrl = uvc_ctrls; + struct uvc_control_info *cend = ctrl + ARRAY_SIZE(uvc_ctrls); + struct uvc_control_mapping *mapping = uvc_ctrl_mappings; + struct uvc_control_mapping *mend = + mapping + ARRAY_SIZE(uvc_ctrl_mappings); + + for (; ctrl < cend; ++ctrl) + uvc_ctrl_add_info(ctrl); + + for (; mapping < mend; ++mapping) + uvc_ctrl_add_mapping(mapping); +} diff --git a/drivers/media/video/uvc/uvc_driver.c b/drivers/media/video/uvc/uvc_driver.c new file mode 100644 index 000000000000..60ced589f898 --- /dev/null +++ b/drivers/media/video/uvc/uvc_driver.c @@ -0,0 +1,1955 @@ +/* + * uvc_driver.c -- USB Video Class driver + * + * Copyright (C) 2005-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* + * This driver aims to support video input devices compliant with the 'USB + * Video Class' specification. + * + * The driver doesn't support the deprecated v4l1 interface. It implements the + * mmap capture method only, and doesn't do any image format conversion in + * software. If your user-space application doesn't support YUYV or MJPEG, fix + * it :-). Please note that the MJPEG data have been stripped from their + * Huffman tables (DHT marker), you will need to add it back if your JPEG + * codec can't handle MJPEG data. + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/usb.h> +#include <linux/videodev2.h> +#include <linux/vmalloc.h> +#include <linux/wait.h> +#include <asm/atomic.h> + +#include <media/v4l2-common.h> + +#include "uvcvideo.h" + +#define DRIVER_AUTHOR "Laurent Pinchart <[email protected]>" +#define DRIVER_DESC "USB Video Class driver" +#ifndef DRIVER_VERSION +#define DRIVER_VERSION "v0.1.0" +#endif + +static unsigned int uvc_quirks_param; +unsigned int uvc_trace_param; + +/* ------------------------------------------------------------------------ + * Control, formats, ... + */ + +static struct uvc_format_desc uvc_fmts[] = { + { + .name = "YUV 4:2:2 (YUYV)", + .guid = UVC_GUID_FORMAT_YUY2, + .fcc = V4L2_PIX_FMT_YUYV, + }, + { + .name = "YUV 4:2:0 (NV12)", + .guid = UVC_GUID_FORMAT_NV12, + .fcc = V4L2_PIX_FMT_NV12, + }, + { + .name = "MJPEG", + .guid = UVC_GUID_FORMAT_MJPEG, + .fcc = V4L2_PIX_FMT_MJPEG, + }, + { + .name = "YVU 4:2:0 (YV12)", + .guid = UVC_GUID_FORMAT_YV12, + .fcc = V4L2_PIX_FMT_YVU420, + }, + { + .name = "YUV 4:2:0 (I420)", + .guid = UVC_GUID_FORMAT_I420, + .fcc = V4L2_PIX_FMT_YUV420, + }, + { + .name = "YUV 4:2:2 (UYVY)", + .guid = UVC_GUID_FORMAT_UYVY, + .fcc = V4L2_PIX_FMT_UYVY, + }, + { + .name = "Greyscale", + .guid = UVC_GUID_FORMAT_Y800, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "RGB Bayer", + .guid = UVC_GUID_FORMAT_BY8, + .fcc = V4L2_PIX_FMT_SBGGR8, + }, +}; + +/* ------------------------------------------------------------------------ + * Utility functions + */ + +struct usb_host_endpoint *uvc_find_endpoint(struct usb_host_interface *alts, + __u8 epaddr) +{ + struct usb_host_endpoint *ep; + unsigned int i; + + for (i = 0; i < alts->desc.bNumEndpoints; ++i) { + ep = &alts->endpoint[i]; + if (ep->desc.bEndpointAddress == epaddr) + return ep; + } + + return NULL; +} + +static struct uvc_format_desc *uvc_format_by_guid(const __u8 guid[16]) +{ + unsigned int len = ARRAY_SIZE(uvc_fmts); + unsigned int i; + + for (i = 0; i < len; ++i) { + if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) + return &uvc_fmts[i]; + } + + return NULL; +} + +static __u32 uvc_colorspace(const __u8 primaries) +{ + static const __u8 colorprimaries[] = { + 0, + V4L2_COLORSPACE_SRGB, + V4L2_COLORSPACE_470_SYSTEM_M, + V4L2_COLORSPACE_470_SYSTEM_BG, + V4L2_COLORSPACE_SMPTE170M, + V4L2_COLORSPACE_SMPTE240M, + }; + + if (primaries < ARRAY_SIZE(colorprimaries)) + return colorprimaries[primaries]; + + return 0; +} + +/* Simplify a fraction using a simple continued fraction decomposition. The + * idea here is to convert fractions such as 333333/10000000 to 1/30 using + * 32 bit arithmetic only. The algorithm is not perfect and relies upon two + * arbitrary parameters to remove non-significative terms from the simple + * continued fraction decomposition. Using 8 and 333 for n_terms and threshold + * respectively seems to give nice results. + */ +void uvc_simplify_fraction(uint32_t *numerator, uint32_t *denominator, + unsigned int n_terms, unsigned int threshold) +{ + uint32_t *an; + uint32_t x, y, r; + unsigned int i, n; + + an = kmalloc(n_terms * sizeof *an, GFP_KERNEL); + if (an == NULL) + return; + + /* Convert the fraction to a simple continued fraction. See + * http://mathforum.org/dr.math/faq/faq.fractions.html + * Stop if the current term is bigger than or equal to the given + * threshold. + */ + x = *numerator; + y = *denominator; + + for (n = 0; n < n_terms && y != 0; ++n) { + an[n] = x / y; + if (an[n] >= threshold) { + if (n < 2) + n++; + break; + } + + r = x - an[n] * y; + x = y; + y = r; + } + + /* Expand the simple continued fraction back to an integer fraction. */ + x = 0; + y = 1; + + for (i = n; i > 0; --i) { + r = y; + y = an[i-1] * y + x; + x = r; + } + + *numerator = y; + *denominator = x; + kfree(an); +} + +/* Convert a fraction to a frame interval in 100ns multiples. The idea here is + * to compute numerator / denominator * 10000000 using 32 bit fixed point + * arithmetic only. + */ +uint32_t uvc_fraction_to_interval(uint32_t numerator, uint32_t denominator) +{ + uint32_t multiplier; + + /* Saturate the result if the operation would overflow. */ + if (denominator == 0 || + numerator/denominator >= ((uint32_t)-1)/10000000) + return (uint32_t)-1; + + /* Divide both the denominator and the multiplier by two until + * numerator * multiplier doesn't overflow. If anyone knows a better + * algorithm please let me know. + */ + multiplier = 10000000; + while (numerator > ((uint32_t)-1)/multiplier) { + multiplier /= 2; + denominator /= 2; + } + + return denominator ? numerator * multiplier / denominator : 0; +} + +/* ------------------------------------------------------------------------ + * Terminal and unit management + */ + +static struct uvc_entity *uvc_entity_by_id(struct uvc_device *dev, int id) +{ + struct uvc_entity *entity; + + list_for_each_entry(entity, &dev->entities, list) { + if (entity->id == id) + return entity; + } + + return NULL; +} + +static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, + int id, struct uvc_entity *entity) +{ + unsigned int i; + + if (entity == NULL) + entity = list_entry(&dev->entities, struct uvc_entity, list); + + list_for_each_entry_continue(entity, &dev->entities, list) { + switch (UVC_ENTITY_TYPE(entity)) { + case TT_STREAMING: + if (entity->output.bSourceID == id) + return entity; + break; + + case VC_PROCESSING_UNIT: + if (entity->processing.bSourceID == id) + return entity; + break; + + case VC_SELECTOR_UNIT: + for (i = 0; i < entity->selector.bNrInPins; ++i) + if (entity->selector.baSourceID[i] == id) + return entity; + break; + + case VC_EXTENSION_UNIT: + for (i = 0; i < entity->extension.bNrInPins; ++i) + if (entity->extension.baSourceID[i] == id) + return entity; + break; + } + } + + return NULL; +} + +/* ------------------------------------------------------------------------ + * Descriptors handling + */ + +static int uvc_parse_format(struct uvc_device *dev, + struct uvc_streaming *streaming, struct uvc_format *format, + __u32 **intervals, unsigned char *buffer, int buflen) +{ + struct usb_interface *intf = streaming->intf; + struct usb_host_interface *alts = intf->cur_altsetting; + struct uvc_format_desc *fmtdesc; + struct uvc_frame *frame; + const unsigned char *start = buffer; + unsigned int interval; + unsigned int i, n; + __u8 ftype; + + format->type = buffer[2]; + format->index = buffer[3]; + + switch (buffer[2]) { + case VS_FORMAT_UNCOMPRESSED: + case VS_FORMAT_FRAME_BASED: + if (buflen < 27) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d FORMAT error\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + /* Find the format descriptor from its GUID. */ + fmtdesc = uvc_format_by_guid(&buffer[5]); + + if (fmtdesc != NULL) { + strncpy(format->name, fmtdesc->name, + sizeof format->name); + format->fcc = fmtdesc->fcc; + } else { + uvc_printk(KERN_INFO, "Unknown video format " + UVC_GUID_FORMAT "\n", + UVC_GUID_ARGS(&buffer[5])); + snprintf(format->name, sizeof format->name, + UVC_GUID_FORMAT, UVC_GUID_ARGS(&buffer[5])); + format->fcc = 0; + } + + format->bpp = buffer[21]; + if (buffer[2] == VS_FORMAT_UNCOMPRESSED) { + ftype = VS_FRAME_UNCOMPRESSED; + } else { + ftype = VS_FRAME_FRAME_BASED; + if (buffer[27]) + format->flags = UVC_FMT_FLAG_COMPRESSED; + } + break; + + case VS_FORMAT_MJPEG: + if (buflen < 11) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d FORMAT error\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + strncpy(format->name, "MJPEG", sizeof format->name); + format->fcc = V4L2_PIX_FMT_MJPEG; + format->flags = UVC_FMT_FLAG_COMPRESSED; + format->bpp = 0; + ftype = VS_FRAME_MJPEG; + break; + + case VS_FORMAT_DV: + if (buflen < 9) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d FORMAT error\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + switch (buffer[8] & 0x7f) { + case 0: + strncpy(format->name, "SD-DV", sizeof format->name); + break; + case 1: + strncpy(format->name, "SDL-DV", sizeof format->name); + break; + case 2: + strncpy(format->name, "HD-DV", sizeof format->name); + break; + default: + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d: unknown DV format %u\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber, buffer[8]); + return -EINVAL; + } + + strncat(format->name, buffer[8] & (1 << 7) ? " 60Hz" : " 50Hz", + sizeof format->name); + + format->fcc = V4L2_PIX_FMT_DV; + format->flags = UVC_FMT_FLAG_COMPRESSED | UVC_FMT_FLAG_STREAM; + format->bpp = 0; + ftype = 0; + + /* Create a dummy frame descriptor. */ + frame = &format->frame[0]; + memset(&format->frame[0], 0, sizeof format->frame[0]); + frame->bFrameIntervalType = 1; + frame->dwDefaultFrameInterval = 1; + frame->dwFrameInterval = *intervals; + *(*intervals)++ = 1; + format->nframes = 1; + break; + + case VS_FORMAT_MPEG2TS: + case VS_FORMAT_STREAM_BASED: + /* Not supported yet. */ + default: + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d unsupported format %u\n", + dev->udev->devnum, alts->desc.bInterfaceNumber, + buffer[2]); + return -EINVAL; + } + + uvc_trace(UVC_TRACE_DESCR, "Found format %s.\n", format->name); + + buflen -= buffer[0]; + buffer += buffer[0]; + + /* Parse the frame descriptors. Only uncompressed, MJPEG and frame + * based formats have frame descriptors. + */ + while (buflen > 2 && buffer[2] == ftype) { + frame = &format->frame[format->nframes]; + + if (ftype != VS_FRAME_FRAME_BASED) + n = buflen > 25 ? buffer[25] : 0; + else + n = buflen > 21 ? buffer[21] : 0; + + n = n ? n : 3; + + if (buflen < 26 + 4*n) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d FRAME error\n", dev->udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + frame->bFrameIndex = buffer[3]; + frame->bmCapabilities = buffer[4]; + frame->wWidth = le16_to_cpup((__le16 *)&buffer[5]); + frame->wHeight = le16_to_cpup((__le16 *)&buffer[7]); + frame->dwMinBitRate = le32_to_cpup((__le32 *)&buffer[9]); + frame->dwMaxBitRate = le32_to_cpup((__le32 *)&buffer[13]); + if (ftype != VS_FRAME_FRAME_BASED) { + frame->dwMaxVideoFrameBufferSize = + le32_to_cpup((__le32 *)&buffer[17]); + frame->dwDefaultFrameInterval = + le32_to_cpup((__le32 *)&buffer[21]); + frame->bFrameIntervalType = buffer[25]; + } else { + frame->dwMaxVideoFrameBufferSize = 0; + frame->dwDefaultFrameInterval = + le32_to_cpup((__le32 *)&buffer[17]); + frame->bFrameIntervalType = buffer[21]; + } + frame->dwFrameInterval = *intervals; + + /* Several UVC chipsets screw up dwMaxVideoFrameBufferSize + * completely. Observed behaviours range from setting the + * value to 1.1x the actual frame size of hardwiring the + * 16 low bits to 0. This results in a higher than necessary + * memory usage as well as a wrong image size information. For + * uncompressed formats this can be fixed by computing the + * value from the frame size. + */ + if (!(format->flags & UVC_FMT_FLAG_COMPRESSED)) + frame->dwMaxVideoFrameBufferSize = format->bpp + * frame->wWidth * frame->wHeight / 8; + + /* Some bogus devices report dwMinFrameInterval equal to + * dwMaxFrameInterval and have dwFrameIntervalStep set to + * zero. Setting all null intervals to 1 fixes the problem and + * some other divisions by zero which could happen. + */ + for (i = 0; i < n; ++i) { + interval = le32_to_cpup((__le32 *)&buffer[26+4*i]); + *(*intervals)++ = interval ? interval : 1; + } + + /* Make sure that the default frame interval stays between + * the boundaries. + */ + n -= frame->bFrameIntervalType ? 1 : 2; + frame->dwDefaultFrameInterval = + min(frame->dwFrameInterval[n], + max(frame->dwFrameInterval[0], + frame->dwDefaultFrameInterval)); + + uvc_trace(UVC_TRACE_DESCR, "- %ux%u (%u.%u fps)\n", + frame->wWidth, frame->wHeight, + 10000000/frame->dwDefaultFrameInterval, + (100000000/frame->dwDefaultFrameInterval)%10); + + format->nframes++; + buflen -= buffer[0]; + buffer += buffer[0]; + } + + if (buflen > 2 && buffer[2] == VS_STILL_IMAGE_FRAME) { + buflen -= buffer[0]; + buffer += buffer[0]; + } + + if (buflen > 2 && buffer[2] == VS_COLORFORMAT) { + if (buflen < 6) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming" + "interface %d COLORFORMAT error\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + format->colorspace = uvc_colorspace(buffer[3]); + + buflen -= buffer[0]; + buffer += buffer[0]; + } + + return buffer - start; +} + +static int uvc_parse_streaming(struct uvc_device *dev, + struct usb_interface *intf) +{ + struct uvc_streaming *streaming = NULL; + struct uvc_format *format; + struct uvc_frame *frame; + struct usb_host_interface *alts = &intf->altsetting[0]; + unsigned char *_buffer, *buffer = alts->extra; + int _buflen, buflen = alts->extralen; + unsigned int nformats = 0, nframes = 0, nintervals = 0; + unsigned int size, i, n, p; + __u32 *interval; + __u16 psize; + int ret = -EINVAL; + + if (intf->cur_altsetting->desc.bInterfaceSubClass + != SC_VIDEOSTREAMING) { + uvc_trace(UVC_TRACE_DESCR, "device %d interface %d isn't a " + "video streaming interface\n", dev->udev->devnum, + intf->altsetting[0].desc.bInterfaceNumber); + return -EINVAL; + } + + if (usb_driver_claim_interface(&uvc_driver.driver, intf, dev)) { + uvc_trace(UVC_TRACE_DESCR, "device %d interface %d is already " + "claimed\n", dev->udev->devnum, + intf->altsetting[0].desc.bInterfaceNumber); + return -EINVAL; + } + + streaming = kzalloc(sizeof *streaming, GFP_KERNEL); + if (streaming == NULL) { + usb_driver_release_interface(&uvc_driver.driver, intf); + return -EINVAL; + } + + mutex_init(&streaming->mutex); + streaming->intf = usb_get_intf(intf); + streaming->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; + + /* The Pico iMage webcam has its class-specific interface descriptors + * after the endpoint descriptors. + */ + if (buflen == 0) { + for (i = 0; i < alts->desc.bNumEndpoints; ++i) { + struct usb_host_endpoint *ep = &alts->endpoint[i]; + + if (ep->extralen == 0) + continue; + + if (ep->extralen > 2 && + ep->extra[1] == USB_DT_CS_INTERFACE) { + uvc_trace(UVC_TRACE_DESCR, "trying extra data " + "from endpoint %u.\n", i); + buffer = alts->endpoint[i].extra; + buflen = alts->endpoint[i].extralen; + break; + } + } + } + + /* Skip the standard interface descriptors. */ + while (buflen > 2 && buffer[1] != USB_DT_CS_INTERFACE) { + buflen -= buffer[0]; + buffer += buffer[0]; + } + + if (buflen <= 2) { + uvc_trace(UVC_TRACE_DESCR, "no class-specific streaming " + "interface descriptors found.\n"); + goto error; + } + + /* Parse the header descriptor. */ + if (buffer[2] == VS_OUTPUT_HEADER) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming interface " + "%d OUTPUT HEADER descriptor is not supported.\n", + dev->udev->devnum, alts->desc.bInterfaceNumber); + goto error; + } else if (buffer[2] == VS_INPUT_HEADER) { + p = buflen >= 5 ? buffer[3] : 0; + n = buflen >= 12 ? buffer[12] : 0; + + if (buflen < 13 + p*n || buffer[2] != VS_INPUT_HEADER) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming " + "interface %d INPUT HEADER descriptor is " + "invalid.\n", dev->udev->devnum, + alts->desc.bInterfaceNumber); + goto error; + } + + streaming->header.bNumFormats = p; + streaming->header.bEndpointAddress = buffer[6]; + streaming->header.bmInfo = buffer[7]; + streaming->header.bTerminalLink = buffer[8]; + streaming->header.bStillCaptureMethod = buffer[9]; + streaming->header.bTriggerSupport = buffer[10]; + streaming->header.bTriggerUsage = buffer[11]; + streaming->header.bControlSize = n; + + streaming->header.bmaControls = kmalloc(p*n, GFP_KERNEL); + if (streaming->header.bmaControls == NULL) { + ret = -ENOMEM; + goto error; + } + + memcpy(streaming->header.bmaControls, &buffer[13], p*n); + } else { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming interface " + "%d HEADER descriptor not found.\n", dev->udev->devnum, + alts->desc.bInterfaceNumber); + goto error; + } + + buflen -= buffer[0]; + buffer += buffer[0]; + + _buffer = buffer; + _buflen = buflen; + + /* Count the format and frame descriptors. */ + while (_buflen > 2) { + switch (_buffer[2]) { + case VS_FORMAT_UNCOMPRESSED: + case VS_FORMAT_MJPEG: + case VS_FORMAT_FRAME_BASED: + nformats++; + break; + + case VS_FORMAT_DV: + /* DV format has no frame descriptor. We will create a + * dummy frame descriptor with a dummy frame interval. + */ + nformats++; + nframes++; + nintervals++; + break; + + case VS_FORMAT_MPEG2TS: + case VS_FORMAT_STREAM_BASED: + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming " + "interface %d FORMAT %u is not supported.\n", + dev->udev->devnum, + alts->desc.bInterfaceNumber, _buffer[2]); + break; + + case VS_FRAME_UNCOMPRESSED: + case VS_FRAME_MJPEG: + nframes++; + if (_buflen > 25) + nintervals += _buffer[25] ? _buffer[25] : 3; + break; + + case VS_FRAME_FRAME_BASED: + nframes++; + if (_buflen > 21) + nintervals += _buffer[21] ? _buffer[21] : 3; + break; + } + + _buflen -= _buffer[0]; + _buffer += _buffer[0]; + } + + if (nformats == 0) { + uvc_trace(UVC_TRACE_DESCR, "device %d videostreaming interface " + "%d has no supported formats defined.\n", + dev->udev->devnum, alts->desc.bInterfaceNumber); + goto error; + } + + size = nformats * sizeof *format + nframes * sizeof *frame + + nintervals * sizeof *interval; + format = kzalloc(size, GFP_KERNEL); + if (format == NULL) { + ret = -ENOMEM; + goto error; + } + + frame = (struct uvc_frame *)&format[nformats]; + interval = (__u32 *)&frame[nframes]; + + streaming->format = format; + streaming->nformats = nformats; + + /* Parse the format descriptors. */ + while (buflen > 2) { + switch (buffer[2]) { + case VS_FORMAT_UNCOMPRESSED: + case VS_FORMAT_MJPEG: + case VS_FORMAT_DV: + case VS_FORMAT_FRAME_BASED: + format->frame = frame; + ret = uvc_parse_format(dev, streaming, format, + &interval, buffer, buflen); + if (ret < 0) + goto error; + + frame += format->nframes; + format++; + + buflen -= ret; + buffer += ret; + continue; + + default: + break; + } + + buflen -= buffer[0]; + buffer += buffer[0]; + } + + /* Parse the alternate settings to find the maximum bandwidth. */ + for (i = 0; i < intf->num_altsetting; ++i) { + struct usb_host_endpoint *ep; + alts = &intf->altsetting[i]; + ep = uvc_find_endpoint(alts, + streaming->header.bEndpointAddress); + if (ep == NULL) + continue; + + psize = le16_to_cpu(ep->desc.wMaxPacketSize); + psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3)); + if (psize > streaming->maxpsize) + streaming->maxpsize = psize; + } + + list_add_tail(&streaming->list, &dev->streaming); + return 0; + +error: + usb_driver_release_interface(&uvc_driver.driver, intf); + usb_put_intf(intf); + kfree(streaming->format); + kfree(streaming->header.bmaControls); + kfree(streaming); + return ret; +} + +/* Parse vendor-specific extensions. */ +static int uvc_parse_vendor_control(struct uvc_device *dev, + const unsigned char *buffer, int buflen) +{ + struct usb_device *udev = dev->udev; + struct usb_host_interface *alts = dev->intf->cur_altsetting; + struct uvc_entity *unit; + unsigned int n, p; + int handled = 0; + + switch (le16_to_cpu(dev->udev->descriptor.idVendor)) { + case 0x046d: /* Logitech */ + if (buffer[1] != 0x41 || buffer[2] != 0x01) + break; + + /* Logitech implements several vendor specific functions + * through vendor specific extension units (LXU). + * + * The LXU descriptors are similar to XU descriptors + * (see "USB Device Video Class for Video Devices", section + * 3.7.2.6 "Extension Unit Descriptor") with the following + * differences: + * + * ---------------------------------------------------------- + * 0 bLength 1 Number + * Size of this descriptor, in bytes: 24+p+n*2 + * ---------------------------------------------------------- + * 23+p+n bmControlsType N Bitmap + * Individual bits in the set are defined: + * 0: Absolute + * 1: Relative + * + * This bitset is mapped exactly the same as bmControls. + * ---------------------------------------------------------- + * 23+p+n*2 bReserved 1 Boolean + * ---------------------------------------------------------- + * 24+p+n*2 iExtension 1 Index + * Index of a string descriptor that describes this + * extension unit. + * ---------------------------------------------------------- + */ + p = buflen >= 22 ? buffer[21] : 0; + n = buflen >= 25 + p ? buffer[22+p] : 0; + + if (buflen < 25 + p + 2*n) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d EXTENSION_UNIT error\n", + udev->devnum, alts->desc.bInterfaceNumber); + break; + } + + unit = kzalloc(sizeof *unit + p + 2*n, GFP_KERNEL); + if (unit == NULL) + return -ENOMEM; + + unit->id = buffer[3]; + unit->type = VC_EXTENSION_UNIT; + memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); + unit->extension.bNumControls = buffer[20]; + unit->extension.bNrInPins = + le16_to_cpup((__le16 *)&buffer[21]); + unit->extension.baSourceID = (__u8 *)unit + sizeof *unit; + memcpy(unit->extension.baSourceID, &buffer[22], p); + unit->extension.bControlSize = buffer[22+p]; + unit->extension.bmControls = (__u8 *)unit + sizeof *unit + p; + unit->extension.bmControlsType = (__u8 *)unit + sizeof *unit + + p + n; + memcpy(unit->extension.bmControls, &buffer[23+p], 2*n); + + if (buffer[24+p+2*n] != 0) + usb_string(udev, buffer[24+p+2*n], unit->name, + sizeof unit->name); + else + sprintf(unit->name, "Extension %u", buffer[3]); + + list_add_tail(&unit->list, &dev->entities); + handled = 1; + break; + } + + return handled; +} + +static int uvc_parse_standard_control(struct uvc_device *dev, + const unsigned char *buffer, int buflen) +{ + struct usb_device *udev = dev->udev; + struct uvc_entity *unit, *term; + struct usb_interface *intf; + struct usb_host_interface *alts = dev->intf->cur_altsetting; + unsigned int i, n, p, len; + __u16 type; + + switch (buffer[2]) { + case VC_HEADER: + n = buflen >= 12 ? buffer[11] : 0; + + if (buflen < 12 || buflen < 12 + n) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d HEADER error\n", udev->devnum, + alts->desc.bInterfaceNumber); + return -EINVAL; + } + + dev->uvc_version = le16_to_cpup((__le16 *)&buffer[3]); + dev->clock_frequency = le32_to_cpup((__le32 *)&buffer[7]); + + /* Parse all USB Video Streaming interfaces. */ + for (i = 0; i < n; ++i) { + intf = usb_ifnum_to_if(udev, buffer[12+i]); + if (intf == NULL) { + uvc_trace(UVC_TRACE_DESCR, "device %d " + "interface %d doesn't exists\n", + udev->devnum, i); + continue; + } + + uvc_parse_streaming(dev, intf); + } + break; + + case VC_INPUT_TERMINAL: + if (buflen < 8) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d INPUT_TERMINAL error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + /* Make sure the terminal type MSB is not null, otherwise it + * could be confused with a unit. + */ + type = le16_to_cpup((__le16 *)&buffer[4]); + if ((type & 0xff00) == 0) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d INPUT_TERMINAL %d has invalid " + "type 0x%04x, skipping\n", udev->devnum, + alts->desc.bInterfaceNumber, + buffer[3], type); + return 0; + } + + n = 0; + p = 0; + len = 8; + + if (type == ITT_CAMERA) { + n = buflen >= 15 ? buffer[14] : 0; + len = 15; + + } else if (type == ITT_MEDIA_TRANSPORT_INPUT) { + n = buflen >= 9 ? buffer[8] : 0; + p = buflen >= 10 + n ? buffer[9+n] : 0; + len = 10; + } + + if (buflen < len + n + p) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d INPUT_TERMINAL error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + term = kzalloc(sizeof *term + n + p, GFP_KERNEL); + if (term == NULL) + return -ENOMEM; + + term->id = buffer[3]; + term->type = type | UVC_TERM_INPUT; + + if (UVC_ENTITY_TYPE(term) == ITT_CAMERA) { + term->camera.bControlSize = n; + term->camera.bmControls = (__u8 *)term + sizeof *term; + term->camera.wObjectiveFocalLengthMin = + le16_to_cpup((__le16 *)&buffer[8]); + term->camera.wObjectiveFocalLengthMax = + le16_to_cpup((__le16 *)&buffer[10]); + term->camera.wOcularFocalLength = + le16_to_cpup((__le16 *)&buffer[12]); + memcpy(term->camera.bmControls, &buffer[15], n); + } else if (UVC_ENTITY_TYPE(term) == ITT_MEDIA_TRANSPORT_INPUT) { + term->media.bControlSize = n; + term->media.bmControls = (__u8 *)term + sizeof *term; + term->media.bTransportModeSize = p; + term->media.bmTransportModes = (__u8 *)term + + sizeof *term + n; + memcpy(term->media.bmControls, &buffer[9], n); + memcpy(term->media.bmTransportModes, &buffer[10+n], p); + } + + if (buffer[7] != 0) + usb_string(udev, buffer[7], term->name, + sizeof term->name); + else if (UVC_ENTITY_TYPE(term) == ITT_CAMERA) + sprintf(term->name, "Camera %u", buffer[3]); + else if (UVC_ENTITY_TYPE(term) == ITT_MEDIA_TRANSPORT_INPUT) + sprintf(term->name, "Media %u", buffer[3]); + else + sprintf(term->name, "Input %u", buffer[3]); + + list_add_tail(&term->list, &dev->entities); + break; + + case VC_OUTPUT_TERMINAL: + if (buflen < 9) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d OUTPUT_TERMINAL error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + /* Make sure the terminal type MSB is not null, otherwise it + * could be confused with a unit. + */ + type = le16_to_cpup((__le16 *)&buffer[4]); + if ((type & 0xff00) == 0) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d OUTPUT_TERMINAL %d has invalid " + "type 0x%04x, skipping\n", udev->devnum, + alts->desc.bInterfaceNumber, buffer[3], type); + return 0; + } + + term = kzalloc(sizeof *term, GFP_KERNEL); + if (term == NULL) + return -ENOMEM; + + term->id = buffer[3]; + term->type = type | UVC_TERM_OUTPUT; + term->output.bSourceID = buffer[7]; + + if (buffer[8] != 0) + usb_string(udev, buffer[8], term->name, + sizeof term->name); + else + sprintf(term->name, "Output %u", buffer[3]); + + list_add_tail(&term->list, &dev->entities); + break; + + case VC_SELECTOR_UNIT: + p = buflen >= 5 ? buffer[4] : 0; + + if (buflen < 5 || buflen < 6 + p) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d SELECTOR_UNIT error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + unit = kzalloc(sizeof *unit + p, GFP_KERNEL); + if (unit == NULL) + return -ENOMEM; + + unit->id = buffer[3]; + unit->type = buffer[2]; + unit->selector.bNrInPins = buffer[4]; + unit->selector.baSourceID = (__u8 *)unit + sizeof *unit; + memcpy(unit->selector.baSourceID, &buffer[5], p); + + if (buffer[5+p] != 0) + usb_string(udev, buffer[5+p], unit->name, + sizeof unit->name); + else + sprintf(unit->name, "Selector %u", buffer[3]); + + list_add_tail(&unit->list, &dev->entities); + break; + + case VC_PROCESSING_UNIT: + n = buflen >= 8 ? buffer[7] : 0; + p = dev->uvc_version >= 0x0110 ? 10 : 9; + + if (buflen < p + n) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d PROCESSING_UNIT error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + unit = kzalloc(sizeof *unit + n, GFP_KERNEL); + if (unit == NULL) + return -ENOMEM; + + unit->id = buffer[3]; + unit->type = buffer[2]; + unit->processing.bSourceID = buffer[4]; + unit->processing.wMaxMultiplier = + le16_to_cpup((__le16 *)&buffer[5]); + unit->processing.bControlSize = buffer[7]; + unit->processing.bmControls = (__u8 *)unit + sizeof *unit; + memcpy(unit->processing.bmControls, &buffer[8], n); + if (dev->uvc_version >= 0x0110) + unit->processing.bmVideoStandards = buffer[9+n]; + + if (buffer[8+n] != 0) + usb_string(udev, buffer[8+n], unit->name, + sizeof unit->name); + else + sprintf(unit->name, "Processing %u", buffer[3]); + + list_add_tail(&unit->list, &dev->entities); + break; + + case VC_EXTENSION_UNIT: + p = buflen >= 22 ? buffer[21] : 0; + n = buflen >= 24 + p ? buffer[22+p] : 0; + + if (buflen < 24 + p + n) { + uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " + "interface %d EXTENSION_UNIT error\n", + udev->devnum, alts->desc.bInterfaceNumber); + return -EINVAL; + } + + unit = kzalloc(sizeof *unit + p + n, GFP_KERNEL); + if (unit == NULL) + return -ENOMEM; + + unit->id = buffer[3]; + unit->type = buffer[2]; + memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); + unit->extension.bNumControls = buffer[20]; + unit->extension.bNrInPins = + le16_to_cpup((__le16 *)&buffer[21]); + unit->extension.baSourceID = (__u8 *)unit + sizeof *unit; + memcpy(unit->extension.baSourceID, &buffer[22], p); + unit->extension.bControlSize = buffer[22+p]; + unit->extension.bmControls = (__u8 *)unit + sizeof *unit + p; + memcpy(unit->extension.bmControls, &buffer[23+p], n); + + if (buffer[23+p+n] != 0) + usb_string(udev, buffer[23+p+n], unit->name, + sizeof unit->name); + else + sprintf(unit->name, "Extension %u", buffer[3]); + + list_add_tail(&unit->list, &dev->entities); + break; + + default: + uvc_trace(UVC_TRACE_DESCR, "Found an unknown CS_INTERFACE " + "descriptor (%u)\n", buffer[2]); + break; + } + + return 0; +} + +static int uvc_parse_control(struct uvc_device *dev) +{ + struct usb_host_interface *alts = dev->intf->cur_altsetting; + unsigned char *buffer = alts->extra; + int buflen = alts->extralen; + int ret; + + /* Parse the default alternate setting only, as the UVC specification + * defines a single alternate setting, the default alternate setting + * zero. + */ + + while (buflen > 2) { + if (uvc_parse_vendor_control(dev, buffer, buflen) || + buffer[1] != USB_DT_CS_INTERFACE) + goto next_descriptor; + + if ((ret = uvc_parse_standard_control(dev, buffer, buflen)) < 0) + return ret; + +next_descriptor: + buflen -= buffer[0]; + buffer += buffer[0]; + } + + /* Check if the optional status endpoint is present. */ + if (alts->desc.bNumEndpoints == 1) { + struct usb_host_endpoint *ep = &alts->endpoint[0]; + struct usb_endpoint_descriptor *desc = &ep->desc; + + if (usb_endpoint_is_int_in(desc) && + le16_to_cpu(desc->wMaxPacketSize) >= 8 && + desc->bInterval != 0) { + uvc_trace(UVC_TRACE_DESCR, "Found a Status endpoint " + "(addr %02x).\n", desc->bEndpointAddress); + dev->int_ep = ep; + } + } + + return 0; +} + +/* ------------------------------------------------------------------------ + * USB probe and disconnect + */ + +/* + * Unregister the video devices. + */ +static void uvc_unregister_video(struct uvc_device *dev) +{ + if (dev->video.vdev) { + if (dev->video.vdev->minor == -1) + video_device_release(dev->video.vdev); + else + video_unregister_device(dev->video.vdev); + dev->video.vdev = NULL; + } +} + +/* + * Scan the UVC descriptors to locate a chain starting at an Output Terminal + * and containing the following units: + * + * - a USB Streaming Output Terminal + * - zero or one Processing Unit + * - zero, one or mode single-input Selector Units + * - zero or one multiple-input Selector Units, provided all inputs are + * connected to input terminals + * - zero, one or mode single-input Extension Units + * - one Camera Input Terminal, or one or more External terminals. + * + * A side forward scan is made on each detected entity to check for additional + * extension units. + */ +static int uvc_scan_chain_entity(struct uvc_video_device *video, + struct uvc_entity *entity) +{ + switch (UVC_ENTITY_TYPE(entity)) { + case VC_EXTENSION_UNIT: + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" <- XU %d", entity->id); + + if (entity->extension.bNrInPins != 1) { + uvc_trace(UVC_TRACE_DESCR, "Extension unit %d has more " + "than 1 input pin.\n", entity->id); + return -1; + } + + list_add_tail(&entity->chain, &video->extensions); + break; + + case VC_PROCESSING_UNIT: + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" <- PU %d", entity->id); + + if (video->processing != NULL) { + uvc_trace(UVC_TRACE_DESCR, "Found multiple " + "Processing Units in chain.\n"); + return -1; + } + + video->processing = entity; + break; + + case VC_SELECTOR_UNIT: + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" <- SU %d", entity->id); + + /* Single-input selector units are ignored. */ + if (entity->selector.bNrInPins == 1) + break; + + if (video->selector != NULL) { + uvc_trace(UVC_TRACE_DESCR, "Found multiple Selector " + "Units in chain.\n"); + return -1; + } + + video->selector = entity; + break; + + case ITT_VENDOR_SPECIFIC: + case ITT_CAMERA: + case ITT_MEDIA_TRANSPORT_INPUT: + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" <- IT %d\n", entity->id); + + list_add_tail(&entity->chain, &video->iterms); + break; + + default: + uvc_trace(UVC_TRACE_DESCR, "Unsupported entity type " + "0x%04x found in chain.\n", UVC_ENTITY_TYPE(entity)); + return -1; + } + + return 0; +} + +static int uvc_scan_chain_forward(struct uvc_video_device *video, + struct uvc_entity *entity, struct uvc_entity *prev) +{ + struct uvc_entity *forward; + int found; + + /* Forward scan */ + forward = NULL; + found = 0; + + while (1) { + forward = uvc_entity_by_reference(video->dev, entity->id, + forward); + if (forward == NULL) + break; + + if (UVC_ENTITY_TYPE(forward) != VC_EXTENSION_UNIT || + forward == prev) + continue; + + if (forward->extension.bNrInPins != 1) { + uvc_trace(UVC_TRACE_DESCR, "Extension unit %d has" + "more than 1 input pin.\n", entity->id); + return -1; + } + + list_add_tail(&forward->chain, &video->extensions); + if (uvc_trace_param & UVC_TRACE_PROBE) { + if (!found) + printk(" (-> XU"); + + printk(" %d", forward->id); + found = 1; + } + } + if (found) + printk(")"); + + return 0; +} + +static int uvc_scan_chain_backward(struct uvc_video_device *video, + struct uvc_entity *entity) +{ + struct uvc_entity *term; + int id = -1, i; + + switch (UVC_ENTITY_TYPE(entity)) { + case VC_EXTENSION_UNIT: + id = entity->extension.baSourceID[0]; + break; + + case VC_PROCESSING_UNIT: + id = entity->processing.bSourceID; + break; + + case VC_SELECTOR_UNIT: + /* Single-input selector units are ignored. */ + if (entity->selector.bNrInPins == 1) { + id = entity->selector.baSourceID[0]; + break; + } + + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" <- IT"); + + video->selector = entity; + for (i = 0; i < entity->selector.bNrInPins; ++i) { + id = entity->selector.baSourceID[i]; + term = uvc_entity_by_id(video->dev, id); + if (term == NULL || !UVC_ENTITY_IS_ITERM(term)) { + uvc_trace(UVC_TRACE_DESCR, "Selector unit %d " + "input %d isn't connected to an " + "input terminal\n", entity->id, i); + return -1; + } + + if (uvc_trace_param & UVC_TRACE_PROBE) + printk(" %d", term->id); + + list_add_tail(&term->chain, &video->iterms); + uvc_scan_chain_forward(video, term, entity); + } + + if (uvc_trace_param & UVC_TRACE_PROBE) + printk("\n"); + + id = 0; + break; + } + + return id; +} + +static int uvc_scan_chain(struct uvc_video_device *video) +{ + struct uvc_entity *entity, *prev; + int id; + + entity = video->oterm; + uvc_trace(UVC_TRACE_PROBE, "Scanning UVC chain: OT %d", entity->id); + id = entity->output.bSourceID; + while (id != 0) { + prev = entity; + entity = uvc_entity_by_id(video->dev, id); + if (entity == NULL) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "unknown entity %d.\n", id); + return -1; + } + + /* Process entity */ + if (uvc_scan_chain_entity(video, entity) < 0) + return -1; + + /* Forward scan */ + if (uvc_scan_chain_forward(video, entity, prev) < 0) + return -1; + + /* Stop when a terminal is found. */ + if (!UVC_ENTITY_IS_UNIT(entity)) + break; + + /* Backward scan */ + id = uvc_scan_chain_backward(video, entity); + if (id < 0) + return id; + } + + /* Initialize the video buffers queue. */ + uvc_queue_init(&video->queue); + + return 0; +} + +/* + * Register the video devices. + * + * The driver currently supports a single video device per control interface + * only. The terminal and units must match the following structure: + * + * ITT_CAMERA -> VC_PROCESSING_UNIT -> VC_EXTENSION_UNIT{0,n} -> TT_STREAMING + * + * The Extension Units, if present, must have a single input pin. The + * Processing Unit and Extension Units can be in any order. Additional + * Extension Units connected to the main chain as single-unit branches are + * also supported. + */ +static int uvc_register_video(struct uvc_device *dev) +{ + struct video_device *vdev; + struct uvc_entity *term; + int found = 0, ret; + + /* Check if the control interface matches the structure we expect. */ + list_for_each_entry(term, &dev->entities, list) { + struct uvc_streaming *streaming; + + if (UVC_ENTITY_TYPE(term) != TT_STREAMING) + continue; + + memset(&dev->video, 0, sizeof dev->video); + mutex_init(&dev->video.ctrl_mutex); + INIT_LIST_HEAD(&dev->video.iterms); + INIT_LIST_HEAD(&dev->video.extensions); + dev->video.oterm = term; + dev->video.dev = dev; + if (uvc_scan_chain(&dev->video) < 0) + continue; + + list_for_each_entry(streaming, &dev->streaming, list) { + if (streaming->header.bTerminalLink == term->id) { + dev->video.streaming = streaming; + found = 1; + break; + } + } + + if (found) + break; + } + + if (!found) { + uvc_printk(KERN_INFO, "No valid video chain found.\n"); + return -1; + } + + if (uvc_trace_param & UVC_TRACE_PROBE) { + uvc_printk(KERN_INFO, "Found a valid video chain ("); + list_for_each_entry(term, &dev->video.iterms, chain) { + printk("%d", term->id); + if (term->chain.next != &dev->video.iterms) + printk(","); + } + printk(" -> %d).\n", dev->video.oterm->id); + } + + /* Initialize the streaming interface with default streaming + * parameters. + */ + if ((ret = uvc_video_init(&dev->video)) < 0) { + uvc_printk(KERN_ERR, "Failed to initialize the device " + "(%d).\n", ret); + return ret; + } + + /* Register the device with V4L. */ + vdev = video_device_alloc(); + if (vdev == NULL) + return -1; + + /* We already hold a reference to dev->udev. The video device will be + * unregistered before the reference is released, so we don't need to + * get another one. + */ + vdev->dev = &dev->intf->dev; + vdev->type = 0; + vdev->type2 = 0; + vdev->minor = -1; + vdev->fops = &uvc_fops; + vdev->release = video_device_release; + strncpy(vdev->name, dev->name, sizeof vdev->name); + + /* Set the driver data before calling video_register_device, otherwise + * uvc_v4l2_open might race us. + * + * FIXME: usb_set_intfdata hasn't been called so far. Is that a + * problem ? Does any function which could be called here get + * a pointer to the usb_interface ? + */ + dev->video.vdev = vdev; + video_set_drvdata(vdev, &dev->video); + + if (video_register_device(vdev, VFL_TYPE_GRABBER, -1) < 0) { + dev->video.vdev = NULL; + video_device_release(vdev); + return -1; + } + + return 0; +} + +/* + * Delete the UVC device. + * + * Called by the kernel when the last reference to the uvc_device structure + * is released. + * + * Unregistering the video devices is done here because every opened instance + * must be closed before the device can be unregistered. An alternative would + * have been to use another reference count for uvc_v4l2_open/uvc_release, and + * unregister the video devices on disconnect when that reference count drops + * to zero. + * + * As this function is called after or during disconnect(), all URBs have + * already been canceled by the USB core. There is no need to kill the + * interrupt URB manually. + */ +void uvc_delete(struct kref *kref) +{ + struct uvc_device *dev = container_of(kref, struct uvc_device, kref); + struct list_head *p, *n; + + /* Unregister the video device */ + uvc_unregister_video(dev); + usb_put_intf(dev->intf); + usb_put_dev(dev->udev); + + uvc_status_cleanup(dev); + uvc_ctrl_cleanup_device(dev); + + list_for_each_safe(p, n, &dev->entities) { + struct uvc_entity *entity; + entity = list_entry(p, struct uvc_entity, list); + kfree(entity); + } + + list_for_each_safe(p, n, &dev->streaming) { + struct uvc_streaming *streaming; + streaming = list_entry(p, struct uvc_streaming, list); + usb_driver_release_interface(&uvc_driver.driver, + streaming->intf); + usb_put_intf(streaming->intf); + kfree(streaming->format); + kfree(streaming->header.bmaControls); + kfree(streaming); + } + + kfree(dev); +} + +static int uvc_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct uvc_device *dev; + int ret; + + if (id->idVendor && id->idProduct) + uvc_trace(UVC_TRACE_PROBE, "Probing known UVC device %s " + "(%04x:%04x)\n", udev->devpath, id->idVendor, + id->idProduct); + else + uvc_trace(UVC_TRACE_PROBE, "Probing generic UVC device %s\n", + udev->devpath); + + /* Allocate memory for the device and initialize it */ + if ((dev = kzalloc(sizeof *dev, GFP_KERNEL)) == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&dev->entities); + INIT_LIST_HEAD(&dev->streaming); + kref_init(&dev->kref); + + dev->udev = usb_get_dev(udev); + dev->intf = usb_get_intf(intf); + dev->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; + dev->quirks = id->driver_info | uvc_quirks_param; + + if (udev->product != NULL) + strncpy(dev->name, udev->product, sizeof dev->name); + else + snprintf(dev->name, sizeof dev->name, + "UVC Camera (%04x:%04x)", + le16_to_cpu(udev->descriptor.idVendor), + le16_to_cpu(udev->descriptor.idProduct)); + + /* Parse the Video Class control descriptor */ + if (uvc_parse_control(dev) < 0) { + uvc_trace(UVC_TRACE_PROBE, "Unable to parse UVC " + "descriptors.\n"); + goto error; + } + + uvc_printk(KERN_INFO, "Found UVC %u.%02u device %s (%04x:%04x)\n", + dev->uvc_version >> 8, dev->uvc_version & 0xff, + udev->product ? udev->product : "<unnamed>", + le16_to_cpu(udev->descriptor.idVendor), + le16_to_cpu(udev->descriptor.idProduct)); + + if (uvc_quirks_param != 0) { + uvc_printk(KERN_INFO, "Forcing device quirks 0x%x by module " + "parameter for testing purpose.\n", uvc_quirks_param); + uvc_printk(KERN_INFO, "Please report required quirks to the " + "linux-uvc-devel mailing list.\n"); + } + + /* Initialize controls */ + if (uvc_ctrl_init_device(dev) < 0) + goto error; + + /* Register the video devices */ + if (uvc_register_video(dev) < 0) + goto error; + + /* Save our data pointer in the interface data */ + usb_set_intfdata(intf, dev); + + /* Initialize the interrupt URB */ + if ((ret = uvc_status_init(dev)) < 0) { + uvc_printk(KERN_INFO, "Unable to initialize the status " + "endpoint (%d), status interrupt will not be " + "supported.\n", ret); + } + + uvc_trace(UVC_TRACE_PROBE, "UVC device initialized.\n"); + return 0; + +error: + kref_put(&dev->kref, uvc_delete); + return -ENODEV; +} + +static void uvc_disconnect(struct usb_interface *intf) +{ + struct uvc_device *dev = usb_get_intfdata(intf); + + /* Set the USB interface data to NULL. This can be done outside the + * lock, as there's no other reader. + */ + usb_set_intfdata(intf, NULL); + + if (intf->cur_altsetting->desc.bInterfaceSubClass == SC_VIDEOSTREAMING) + return; + + /* uvc_v4l2_open() might race uvc_disconnect(). A static driver-wide + * lock is needed to prevent uvc_disconnect from releasing its + * reference to the uvc_device instance after uvc_v4l2_open() received + * the pointer to the device (video_devdata) but before it got the + * chance to increase the reference count (kref_get). + */ + mutex_lock(&uvc_driver.open_mutex); + + dev->state |= UVC_DEV_DISCONNECTED; + kref_put(&dev->kref, uvc_delete); + + mutex_unlock(&uvc_driver.open_mutex); +} + +static int uvc_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct uvc_device *dev = usb_get_intfdata(intf); + + uvc_trace(UVC_TRACE_SUSPEND, "Suspending interface %u\n", + intf->cur_altsetting->desc.bInterfaceNumber); + + /* Controls are cached on the fly so they don't need to be saved. */ + if (intf->cur_altsetting->desc.bInterfaceSubClass == SC_VIDEOCONTROL) + return uvc_status_suspend(dev); + + if (dev->video.streaming->intf != intf) { + uvc_trace(UVC_TRACE_SUSPEND, "Suspend: video streaming USB " + "interface mismatch.\n"); + return -EINVAL; + } + + return uvc_video_suspend(&dev->video); +} + +static int uvc_resume(struct usb_interface *intf) +{ + struct uvc_device *dev = usb_get_intfdata(intf); + int ret; + + uvc_trace(UVC_TRACE_SUSPEND, "Resuming interface %u\n", + intf->cur_altsetting->desc.bInterfaceNumber); + + if (intf->cur_altsetting->desc.bInterfaceSubClass == SC_VIDEOCONTROL) { + if ((ret = uvc_ctrl_resume_device(dev)) < 0) + return ret; + + return uvc_status_resume(dev); + } + + if (dev->video.streaming->intf != intf) { + uvc_trace(UVC_TRACE_SUSPEND, "Resume: video streaming USB " + "interface mismatch.\n"); + return -EINVAL; + } + + return uvc_video_resume(&dev->video); +} + +/* ------------------------------------------------------------------------ + * Driver initialization and cleanup + */ + +/* + * The Logitech cameras listed below have their interface class set to + * VENDOR_SPEC because they don't announce themselves as UVC devices, even + * though they are compliant. + */ +static struct usb_device_id uvc_ids[] = { + /* ALi M5606 (Clevo M540SR) */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0402, + .idProduct = 0x5606, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Creative Live! Optia */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x041e, + .idProduct = 0x4057, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Microsoft Lifecam NX-6000 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x045e, + .idProduct = 0x00f8, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Microsoft Lifecam VX-7000 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x045e, + .idProduct = 0x0723, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Logitech Quickcam Fusion */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c1, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Logitech Quickcam Orbit MP */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c2, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Logitech Quickcam Pro for Notebook */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c3, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Logitech Quickcam Pro 5000 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c5, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Logitech Quickcam OEM Dell Notebook */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c6, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Logitech Quickcam OEM Cisco VT Camera II */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x046d, + .idProduct = 0x08c7, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0 }, + /* Apple Built-In iSight */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x05ac, + .idProduct = 0x8501, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX + | UVC_QUIRK_BUILTIN_ISIGHT }, + /* Genesys Logic USB 2.0 PC Camera */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x05e3, + .idProduct = 0x0505, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_STREAM_NO_FID }, + /* Silicon Motion SM371 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x090c, + .idProduct = 0xb371, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* MT6227 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0e8d, + .idProduct = 0x0004, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Syntek (HP Spartan) */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x174f, + .idProduct = 0x5212, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_STREAM_NO_FID }, + /* Syntek (Asus U3S) */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x174f, + .idProduct = 0x8a33, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_STREAM_NO_FID }, + /* Ecamm Pico iMage */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x18cd, + .idProduct = 0xcafe, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_EXTRAFIELDS }, + /* Bodelin ProScopeHR */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_DEV_HI + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x19ab, + .idProduct = 0x1000, + .bcdDevice_hi = 0x0126, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_STATUS_INTERVAL }, + /* SiGma Micro USB Web Camera */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x1c4f, + .idProduct = 0x3000, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX + | UVC_QUIRK_IGNORE_SELECTOR_UNIT}, + /* Acer OEM Webcam - Unknown vendor */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x5986, + .idProduct = 0x0100, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Packard Bell OEM Webcam */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x5986, + .idProduct = 0x0101, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Acer Crystal Eye webcam */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x5986, + .idProduct = 0x0102, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Acer OrbiCam - Unknown vendor */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x5986, + .idProduct = 0x0200, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, + /* Generic USB Video Class */ + { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, 0) }, + {} +}; + +MODULE_DEVICE_TABLE(usb, uvc_ids); + +struct uvc_driver uvc_driver = { + .driver = { + .name = "uvcvideo", + .probe = uvc_probe, + .disconnect = uvc_disconnect, + .suspend = uvc_suspend, + .resume = uvc_resume, + .id_table = uvc_ids, + .supports_autosuspend = 1, + }, +}; + +static int __init uvc_init(void) +{ + int result; + + INIT_LIST_HEAD(&uvc_driver.devices); + INIT_LIST_HEAD(&uvc_driver.controls); + mutex_init(&uvc_driver.open_mutex); + mutex_init(&uvc_driver.ctrl_mutex); + + uvc_ctrl_init(); + + result = usb_register(&uvc_driver.driver); + if (result == 0) + printk(KERN_INFO DRIVER_DESC " (" DRIVER_VERSION ")\n"); + return result; +} + +static void __exit uvc_cleanup(void) +{ + usb_deregister(&uvc_driver.driver); +} + +module_init(uvc_init); +module_exit(uvc_cleanup); + +module_param_named(quirks, uvc_quirks_param, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(quirks, "Forced device quirks"); +module_param_named(trace, uvc_trace_param, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(trace, "Trace level bitmask"); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/media/video/uvc/uvc_isight.c b/drivers/media/video/uvc/uvc_isight.c new file mode 100644 index 000000000000..37bdefdbead5 --- /dev/null +++ b/drivers/media/video/uvc/uvc_isight.c @@ -0,0 +1,134 @@ +/* + * uvc_isight.c -- USB Video Class driver - iSight support + * + * Copyright (C) 2006-2007 + * Ivan N. Zlatev <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/usb.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include "uvcvideo.h" + +/* Built-in iSight webcams implements most of UVC 1.0 except a + * different packet format. Instead of sending a header at the + * beginning of each isochronous transfer payload, the webcam sends a + * single header per image (on its own in a packet), followed by + * packets containing data only. + * + * Offset Size (bytes) Description + * ------------------------------------------------------------------ + * 0x00 1 Header length + * 0x01 1 Flags (UVC-compliant) + * 0x02 4 Always equal to '11223344' + * 0x06 8 Always equal to 'deadbeefdeadface' + * 0x0e 16 Unknown + * + * The header can be prefixed by an optional, unknown-purpose byte. + */ + +static int isight_decode(struct uvc_video_queue *queue, struct uvc_buffer *buf, + const __u8 *data, unsigned int len) +{ + static const __u8 hdr[] = { + 0x11, 0x22, 0x33, 0x44, + 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xfa, 0xce + }; + + unsigned int maxlen, nbytes; + __u8 *mem; + int is_header = 0; + + if (buf == NULL) + return 0; + + if ((len >= 14 && memcmp(&data[2], hdr, 12) == 0) || + (len >= 15 && memcmp(&data[3], hdr, 12) == 0)) { + uvc_trace(UVC_TRACE_FRAME, "iSight header found\n"); + is_header = 1; + } + + /* Synchronize to the input stream by waiting for a header packet. */ + if (buf->state != UVC_BUF_STATE_ACTIVE) { + if (!is_header) { + uvc_trace(UVC_TRACE_FRAME, "Dropping packet (out of " + "sync).\n"); + return 0; + } + + buf->state = UVC_BUF_STATE_ACTIVE; + } + + /* Mark the buffer as done if we're at the beginning of a new frame. + * + * Empty buffers (bytesused == 0) don't trigger end of frame detection + * as it doesn't make sense to return an empty buffer. + */ + if (is_header && buf->buf.bytesused != 0) { + buf->state = UVC_BUF_STATE_DONE; + return -EAGAIN; + } + + /* Copy the video data to the buffer. Skip header packets, as they + * contain no data. + */ + if (!is_header) { + maxlen = buf->buf.length - buf->buf.bytesused; + mem = queue->mem + buf->buf.m.offset + buf->buf.bytesused; + nbytes = min(len, maxlen); + memcpy(mem, data, nbytes); + buf->buf.bytesused += nbytes; + + if (len > maxlen || buf->buf.bytesused == buf->buf.length) { + uvc_trace(UVC_TRACE_FRAME, "Frame complete " + "(overflow).\n"); + buf->state = UVC_BUF_STATE_DONE; + } + } + + return 0; +} + +void uvc_video_decode_isight(struct urb *urb, struct uvc_video_device *video, + struct uvc_buffer *buf) +{ + int ret, i; + + for (i = 0; i < urb->number_of_packets; ++i) { + if (urb->iso_frame_desc[i].status < 0) { + uvc_trace(UVC_TRACE_FRAME, "USB isochronous frame " + "lost (%d).\n", + urb->iso_frame_desc[i].status); + } + + /* Decode the payload packet. + * uvc_video_decode is entered twice when a frame transition + * has been detected because the end of frame can only be + * reliably detected when the first packet of the new frame + * is processed. The first pass detects the transition and + * closes the previous frame's buffer, the second pass + * processes the data of the first payload of the new frame. + */ + do { + ret = isight_decode(&video->queue, buf, + urb->transfer_buffer + + urb->iso_frame_desc[i].offset, + urb->iso_frame_desc[i].actual_length); + + if (buf == NULL) + break; + + if (buf->state == UVC_BUF_STATE_DONE || + buf->state == UVC_BUF_STATE_ERROR) + buf = uvc_queue_next_buffer(&video->queue, buf); + } while (ret == -EAGAIN); + } +} diff --git a/drivers/media/video/uvc/uvc_queue.c b/drivers/media/video/uvc/uvc_queue.c new file mode 100644 index 000000000000..0923f0e3b3d4 --- /dev/null +++ b/drivers/media/video/uvc/uvc_queue.c @@ -0,0 +1,477 @@ +/* + * uvc_queue.c -- USB Video Class driver - Buffers management + * + * Copyright (C) 2005-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/usb.h> +#include <linux/videodev2.h> +#include <linux/vmalloc.h> +#include <linux/wait.h> +#include <asm/atomic.h> + +#include "uvcvideo.h" + +/* ------------------------------------------------------------------------ + * Video buffers queue management. + * + * Video queues is initialized by uvc_queue_init(). The function performs + * basic initialization of the uvc_video_queue struct and never fails. + * + * Video buffer allocation and freeing are performed by uvc_alloc_buffers and + * uvc_free_buffers respectively. The former acquires the video queue lock, + * while the later must be called with the lock held (so that allocation can + * free previously allocated buffers). Trying to free buffers that are mapped + * to user space will return -EBUSY. + * + * Video buffers are managed using two queues. However, unlike most USB video + * drivers which use an in queue and an out queue, we use a main queue which + * holds all queued buffers (both 'empty' and 'done' buffers), and an irq + * queue which holds empty buffers. This design (copied from video-buf) + * minimizes locking in interrupt, as only one queue is shared between + * interrupt and user contexts. + * + * Use cases + * --------- + * + * Unless stated otherwise, all operations which modify the irq buffers queue + * are protected by the irq spinlock. + * + * 1. The user queues the buffers, starts streaming and dequeues a buffer. + * + * The buffers are added to the main and irq queues. Both operations are + * protected by the queue lock, and the latert is protected by the irq + * spinlock as well. + * + * The completion handler fetches a buffer from the irq queue and fills it + * with video data. If no buffer is available (irq queue empty), the handler + * returns immediately. + * + * When the buffer is full, the completion handler removes it from the irq + * queue, marks it as ready (UVC_BUF_STATE_DONE) and wake its wait queue. + * At that point, any process waiting on the buffer will be woken up. If a + * process tries to dequeue a buffer after it has been marked ready, the + * dequeing will succeed immediately. + * + * 2. Buffers are queued, user is waiting on a buffer and the device gets + * disconnected. + * + * When the device is disconnected, the kernel calls the completion handler + * with an appropriate status code. The handler marks all buffers in the + * irq queue as being erroneous (UVC_BUF_STATE_ERROR) and wakes them up so + * that any process waiting on a buffer gets woken up. + * + * Waking up up the first buffer on the irq list is not enough, as the + * process waiting on the buffer might restart the dequeue operation + * immediately. + * + */ + +void uvc_queue_init(struct uvc_video_queue *queue) +{ + mutex_init(&queue->mutex); + spin_lock_init(&queue->irqlock); + INIT_LIST_HEAD(&queue->mainqueue); + INIT_LIST_HEAD(&queue->irqqueue); +} + +/* + * Allocate the video buffers. + * + * Pages are reserved to make sure they will not be swaped, as they will be + * filled in URB completion handler. + * + * Buffers will be individually mapped, so they must all be page aligned. + */ +int uvc_alloc_buffers(struct uvc_video_queue *queue, unsigned int nbuffers, + unsigned int buflength) +{ + unsigned int bufsize = PAGE_ALIGN(buflength); + unsigned int i; + void *mem = NULL; + int ret; + + if (nbuffers > UVC_MAX_VIDEO_BUFFERS) + nbuffers = UVC_MAX_VIDEO_BUFFERS; + + mutex_lock(&queue->mutex); + + if ((ret = uvc_free_buffers(queue)) < 0) + goto done; + + /* Bail out if no buffers should be allocated. */ + if (nbuffers == 0) + goto done; + + /* Decrement the number of buffers until allocation succeeds. */ + for (; nbuffers > 0; --nbuffers) { + mem = vmalloc_32(nbuffers * bufsize); + if (mem != NULL) + break; + } + + if (mem == NULL) { + ret = -ENOMEM; + goto done; + } + + for (i = 0; i < nbuffers; ++i) { + memset(&queue->buffer[i], 0, sizeof queue->buffer[i]); + queue->buffer[i].buf.index = i; + queue->buffer[i].buf.m.offset = i * bufsize; + queue->buffer[i].buf.length = buflength; + queue->buffer[i].buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + queue->buffer[i].buf.sequence = 0; + queue->buffer[i].buf.field = V4L2_FIELD_NONE; + queue->buffer[i].buf.memory = V4L2_MEMORY_MMAP; + queue->buffer[i].buf.flags = 0; + init_waitqueue_head(&queue->buffer[i].wait); + } + + queue->mem = mem; + queue->count = nbuffers; + queue->buf_size = bufsize; + ret = nbuffers; + +done: + mutex_unlock(&queue->mutex); + return ret; +} + +/* + * Free the video buffers. + * + * This function must be called with the queue lock held. + */ +int uvc_free_buffers(struct uvc_video_queue *queue) +{ + unsigned int i; + + for (i = 0; i < queue->count; ++i) { + if (queue->buffer[i].vma_use_count != 0) + return -EBUSY; + } + + if (queue->count) { + vfree(queue->mem); + queue->count = 0; + } + + return 0; +} + +static void __uvc_query_buffer(struct uvc_buffer *buf, + struct v4l2_buffer *v4l2_buf) +{ + memcpy(v4l2_buf, &buf->buf, sizeof *v4l2_buf); + + if (buf->vma_use_count) + v4l2_buf->flags |= V4L2_BUF_FLAG_MAPPED; + + switch (buf->state) { + case UVC_BUF_STATE_ERROR: + case UVC_BUF_STATE_DONE: + v4l2_buf->flags |= V4L2_BUF_FLAG_DONE; + break; + case UVC_BUF_STATE_QUEUED: + case UVC_BUF_STATE_ACTIVE: + v4l2_buf->flags |= V4L2_BUF_FLAG_QUEUED; + break; + case UVC_BUF_STATE_IDLE: + default: + break; + } +} + +int uvc_query_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf) +{ + int ret = 0; + + mutex_lock(&queue->mutex); + if (v4l2_buf->index >= queue->count) { + ret = -EINVAL; + goto done; + } + + __uvc_query_buffer(&queue->buffer[v4l2_buf->index], v4l2_buf); + +done: + mutex_unlock(&queue->mutex); + return ret; +} + +/* + * Queue a video buffer. Attempting to queue a buffer that has already been + * queued will return -EINVAL. + */ +int uvc_queue_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf) +{ + struct uvc_buffer *buf; + unsigned long flags; + int ret = 0; + + uvc_trace(UVC_TRACE_CAPTURE, "Queuing buffer %u.\n", v4l2_buf->index); + + if (v4l2_buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || + v4l2_buf->memory != V4L2_MEMORY_MMAP) { + uvc_trace(UVC_TRACE_CAPTURE, "[E] Invalid buffer type (%u) " + "and/or memory (%u).\n", v4l2_buf->type, + v4l2_buf->memory); + return -EINVAL; + } + + mutex_lock(&queue->mutex); + if (v4l2_buf->index >= queue->count) { + uvc_trace(UVC_TRACE_CAPTURE, "[E] Out of range index.\n"); + ret = -EINVAL; + goto done; + } + + buf = &queue->buffer[v4l2_buf->index]; + if (buf->state != UVC_BUF_STATE_IDLE) { + uvc_trace(UVC_TRACE_CAPTURE, "[E] Invalid buffer state " + "(%u).\n", buf->state); + ret = -EINVAL; + goto done; + } + + spin_lock_irqsave(&queue->irqlock, flags); + if (queue->flags & UVC_QUEUE_DISCONNECTED) { + spin_unlock_irqrestore(&queue->irqlock, flags); + ret = -ENODEV; + goto done; + } + buf->state = UVC_BUF_STATE_QUEUED; + buf->buf.bytesused = 0; + list_add_tail(&buf->stream, &queue->mainqueue); + list_add_tail(&buf->queue, &queue->irqqueue); + spin_unlock_irqrestore(&queue->irqlock, flags); + +done: + mutex_unlock(&queue->mutex); + return ret; +} + +static int uvc_queue_waiton(struct uvc_buffer *buf, int nonblocking) +{ + if (nonblocking) { + return (buf->state != UVC_BUF_STATE_QUEUED && + buf->state != UVC_BUF_STATE_ACTIVE) + ? 0 : -EAGAIN; + } + + return wait_event_interruptible(buf->wait, + buf->state != UVC_BUF_STATE_QUEUED && + buf->state != UVC_BUF_STATE_ACTIVE); +} + +/* + * Dequeue a video buffer. If nonblocking is false, block until a buffer is + * available. + */ +int uvc_dequeue_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf, int nonblocking) +{ + struct uvc_buffer *buf; + int ret = 0; + + if (v4l2_buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || + v4l2_buf->memory != V4L2_MEMORY_MMAP) { + uvc_trace(UVC_TRACE_CAPTURE, "[E] Invalid buffer type (%u) " + "and/or memory (%u).\n", v4l2_buf->type, + v4l2_buf->memory); + return -EINVAL; + } + + mutex_lock(&queue->mutex); + if (list_empty(&queue->mainqueue)) { + uvc_trace(UVC_TRACE_CAPTURE, "[E] Empty buffer queue.\n"); + ret = -EINVAL; + goto done; + } + + buf = list_first_entry(&queue->mainqueue, struct uvc_buffer, stream); + if ((ret = uvc_queue_waiton(buf, nonblocking)) < 0) + goto done; + + uvc_trace(UVC_TRACE_CAPTURE, "Dequeuing buffer %u (%u, %u bytes).\n", + buf->buf.index, buf->state, buf->buf.bytesused); + + switch (buf->state) { + case UVC_BUF_STATE_ERROR: + uvc_trace(UVC_TRACE_CAPTURE, "[W] Corrupted data " + "(transmission error).\n"); + ret = -EIO; + case UVC_BUF_STATE_DONE: + buf->state = UVC_BUF_STATE_IDLE; + break; + + case UVC_BUF_STATE_IDLE: + case UVC_BUF_STATE_QUEUED: + case UVC_BUF_STATE_ACTIVE: + default: + uvc_trace(UVC_TRACE_CAPTURE, "[E] Invalid buffer state %u " + "(driver bug?).\n", buf->state); + ret = -EINVAL; + goto done; + } + + list_del(&buf->stream); + __uvc_query_buffer(buf, v4l2_buf); + +done: + mutex_unlock(&queue->mutex); + return ret; +} + +/* + * Poll the video queue. + * + * This function implements video queue polling and is intended to be used by + * the device poll handler. + */ +unsigned int uvc_queue_poll(struct uvc_video_queue *queue, struct file *file, + poll_table *wait) +{ + struct uvc_buffer *buf; + unsigned int mask = 0; + + mutex_lock(&queue->mutex); + if (list_empty(&queue->mainqueue)) { + mask |= POLLERR; + goto done; + } + buf = list_first_entry(&queue->mainqueue, struct uvc_buffer, stream); + + poll_wait(file, &buf->wait, wait); + if (buf->state == UVC_BUF_STATE_DONE || + buf->state == UVC_BUF_STATE_ERROR) + mask |= POLLIN | POLLRDNORM; + +done: + mutex_unlock(&queue->mutex); + return mask; +} + +/* + * Enable or disable the video buffers queue. + * + * The queue must be enabled before starting video acquisition and must be + * disabled after stopping it. This ensures that the video buffers queue + * state can be properly initialized before buffers are accessed from the + * interrupt handler. + * + * Enabling the video queue initializes parameters (such as sequence number, + * sync pattern, ...). If the queue is already enabled, return -EBUSY. + * + * Disabling the video queue cancels the queue and removes all buffers from + * the main queue. + * + * This function can't be called from interrupt context. Use + * uvc_queue_cancel() instead. + */ +int uvc_queue_enable(struct uvc_video_queue *queue, int enable) +{ + unsigned int i; + int ret = 0; + + mutex_lock(&queue->mutex); + if (enable) { + if (uvc_queue_streaming(queue)) { + ret = -EBUSY; + goto done; + } + queue->sequence = 0; + queue->flags |= UVC_QUEUE_STREAMING; + } else { + uvc_queue_cancel(queue, 0); + INIT_LIST_HEAD(&queue->mainqueue); + + for (i = 0; i < queue->count; ++i) + queue->buffer[i].state = UVC_BUF_STATE_IDLE; + + queue->flags &= ~UVC_QUEUE_STREAMING; + } + +done: + mutex_unlock(&queue->mutex); + return ret; +} + +/* + * Cancel the video buffers queue. + * + * Cancelling the queue marks all buffers on the irq queue as erroneous, + * wakes them up and remove them from the queue. + * + * If the disconnect parameter is set, further calls to uvc_queue_buffer will + * fail with -ENODEV. + * + * This function acquires the irq spinlock and can be called from interrupt + * context. + */ +void uvc_queue_cancel(struct uvc_video_queue *queue, int disconnect) +{ + struct uvc_buffer *buf; + unsigned long flags; + + spin_lock_irqsave(&queue->irqlock, flags); + while (!list_empty(&queue->irqqueue)) { + buf = list_first_entry(&queue->irqqueue, struct uvc_buffer, + queue); + list_del(&buf->queue); + buf->state = UVC_BUF_STATE_ERROR; + wake_up(&buf->wait); + } + /* This must be protected by the irqlock spinlock to avoid race + * conditions between uvc_queue_buffer and the disconnection event that + * could result in an interruptible wait in uvc_dequeue_buffer. Do not + * blindly replace this logic by checking for the UVC_DEV_DISCONNECTED + * state outside the queue code. + */ + if (disconnect) + queue->flags |= UVC_QUEUE_DISCONNECTED; + spin_unlock_irqrestore(&queue->irqlock, flags); +} + +struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue, + struct uvc_buffer *buf) +{ + struct uvc_buffer *nextbuf; + unsigned long flags; + + if ((queue->flags & UVC_QUEUE_DROP_INCOMPLETE) && + buf->buf.length != buf->buf.bytesused) { + buf->state = UVC_BUF_STATE_QUEUED; + buf->buf.bytesused = 0; + return buf; + } + + spin_lock_irqsave(&queue->irqlock, flags); + list_del(&buf->queue); + if (!list_empty(&queue->irqqueue)) + nextbuf = list_first_entry(&queue->irqqueue, struct uvc_buffer, + queue); + else + nextbuf = NULL; + spin_unlock_irqrestore(&queue->irqlock, flags); + + buf->buf.sequence = queue->sequence++; + do_gettimeofday(&buf->buf.timestamp); + + wake_up(&buf->wait); + return nextbuf; +} diff --git a/drivers/media/video/uvc/uvc_status.c b/drivers/media/video/uvc/uvc_status.c new file mode 100644 index 000000000000..be9084e5eace --- /dev/null +++ b/drivers/media/video/uvc/uvc_status.c @@ -0,0 +1,207 @@ +/* + * uvc_status.c -- USB Video Class driver - Status endpoint + * + * Copyright (C) 2007-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/input.h> +#include <linux/usb.h> +#include <linux/usb/input.h> + +#include "uvcvideo.h" + +/* -------------------------------------------------------------------------- + * Input device + */ +static int uvc_input_init(struct uvc_device *dev) +{ + struct usb_device *udev = dev->udev; + struct input_dev *input; + char *phys = NULL; + int ret; + + input = input_allocate_device(); + if (input == NULL) + return -ENOMEM; + + phys = kmalloc(6 + strlen(udev->bus->bus_name) + strlen(udev->devpath), + GFP_KERNEL); + if (phys == NULL) { + ret = -ENOMEM; + goto error; + } + sprintf(phys, "usb-%s-%s", udev->bus->bus_name, udev->devpath); + + input->name = dev->name; + input->phys = phys; + usb_to_input_id(udev, &input->id); + input->dev.parent = &dev->intf->dev; + + set_bit(EV_KEY, input->evbit); + set_bit(BTN_0, input->keybit); + + if ((ret = input_register_device(input)) < 0) + goto error; + + dev->input = input; + return 0; + +error: + input_free_device(input); + kfree(phys); + return ret; +} + +static void uvc_input_cleanup(struct uvc_device *dev) +{ + if (dev->input) + input_unregister_device(dev->input); +} + +/* -------------------------------------------------------------------------- + * Status interrupt endpoint + */ +static void uvc_event_streaming(struct uvc_device *dev, __u8 *data, int len) +{ + if (len < 3) { + uvc_trace(UVC_TRACE_STATUS, "Invalid streaming status event " + "received.\n"); + return; + } + + if (data[2] == 0) { + if (len < 4) + return; + uvc_trace(UVC_TRACE_STATUS, "Button (intf %u) %s len %d\n", + data[1], data[3] ? "pressed" : "released", len); + if (dev->input) + input_report_key(dev->input, BTN_0, data[3]); + } else { + uvc_trace(UVC_TRACE_STATUS, "Stream %u error event %02x %02x " + "len %d.\n", data[1], data[2], data[3], len); + } +} + +static void uvc_event_control(struct uvc_device *dev, __u8 *data, int len) +{ + char *attrs[3] = { "value", "info", "failure" }; + + if (len < 6 || data[2] != 0 || data[4] > 2) { + uvc_trace(UVC_TRACE_STATUS, "Invalid control status event " + "received.\n"); + return; + } + + uvc_trace(UVC_TRACE_STATUS, "Control %u/%u %s change len %d.\n", + data[1], data[3], attrs[data[4]], len); +} + +static void uvc_status_complete(struct urb *urb) +{ + struct uvc_device *dev = urb->context; + int len, ret; + + switch (urb->status) { + case 0: + break; + + case -ENOENT: /* usb_kill_urb() called. */ + case -ECONNRESET: /* usb_unlink_urb() called. */ + case -ESHUTDOWN: /* The endpoint is being disabled. */ + case -EPROTO: /* Device is disconnected (reported by some + * host controller). */ + return; + + default: + uvc_printk(KERN_WARNING, "Non-zero status (%d) in status " + "completion handler.\n", urb->status); + return; + } + + len = urb->actual_length; + if (len > 0) { + switch (dev->status[0] & 0x0f) { + case UVC_STATUS_TYPE_CONTROL: + uvc_event_control(dev, dev->status, len); + break; + + case UVC_STATUS_TYPE_STREAMING: + uvc_event_streaming(dev, dev->status, len); + break; + + default: + uvc_printk(KERN_INFO, "unknown event type %u.\n", + dev->status[0]); + break; + } + } + + /* Resubmit the URB. */ + urb->interval = dev->int_ep->desc.bInterval; + if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) { + uvc_printk(KERN_ERR, "Failed to resubmit status URB (%d).\n", + ret); + } +} + +int uvc_status_init(struct uvc_device *dev) +{ + struct usb_host_endpoint *ep = dev->int_ep; + unsigned int pipe; + int interval; + + if (ep == NULL) + return 0; + + uvc_input_init(dev); + + dev->int_urb = usb_alloc_urb(0, GFP_KERNEL); + if (dev->int_urb == NULL) + return -ENOMEM; + + pipe = usb_rcvintpipe(dev->udev, ep->desc.bEndpointAddress); + + /* For high-speed interrupt endpoints, the bInterval value is used as + * an exponent of two. Some developers forgot about it. + */ + interval = ep->desc.bInterval; + if (interval > 16 && dev->udev->speed == USB_SPEED_HIGH && + (dev->quirks & UVC_QUIRK_STATUS_INTERVAL)) + interval = fls(interval) - 1; + + usb_fill_int_urb(dev->int_urb, dev->udev, pipe, + dev->status, sizeof dev->status, uvc_status_complete, + dev, interval); + + return usb_submit_urb(dev->int_urb, GFP_KERNEL); +} + +void uvc_status_cleanup(struct uvc_device *dev) +{ + usb_kill_urb(dev->int_urb); + usb_free_urb(dev->int_urb); + uvc_input_cleanup(dev); +} + +int uvc_status_suspend(struct uvc_device *dev) +{ + usb_kill_urb(dev->int_urb); + return 0; +} + +int uvc_status_resume(struct uvc_device *dev) +{ + if (dev->int_urb == NULL) + return 0; + + return usb_submit_urb(dev->int_urb, GFP_KERNEL); +} diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c new file mode 100644 index 000000000000..2e0a66575bb4 --- /dev/null +++ b/drivers/media/video/uvc/uvc_v4l2.c @@ -0,0 +1,1105 @@ +/* + * uvc_v4l2.c -- USB Video Class driver - V4L2 API + * + * Copyright (C) 2005-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/usb.h> +#include <linux/videodev2.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/wait.h> +#include <asm/atomic.h> + +#include <media/v4l2-common.h> + +#include "uvcvideo.h" + +/* ------------------------------------------------------------------------ + * V4L2 interface + */ + +/* + * Mapping V4L2 controls to UVC controls can be straighforward if done well. + * Most of the UVC controls exist in V4L2, and can be mapped directly. Some + * must be grouped (for instance the Red Balance, Blue Balance and Do White + * Balance V4L2 controls use the White Balance Component UVC control) or + * otherwise translated. The approach we take here is to use a translation + * table for the controls which can be mapped directly, and handle the others + * manually. + */ +static int uvc_v4l2_query_menu(struct uvc_video_device *video, + struct v4l2_querymenu *query_menu) +{ + struct uvc_menu_info *menu_info; + struct uvc_control_mapping *mapping; + struct uvc_control *ctrl; + + ctrl = uvc_find_control(video, query_menu->id, &mapping); + if (ctrl == NULL || mapping->v4l2_type != V4L2_CTRL_TYPE_MENU) + return -EINVAL; + + if (query_menu->index >= mapping->menu_count) + return -EINVAL; + + menu_info = &mapping->menu_info[query_menu->index]; + strncpy(query_menu->name, menu_info->name, 32); + return 0; +} + +/* + * Find the frame interval closest to the requested frame interval for the + * given frame format and size. This should be done by the device as part of + * the Video Probe and Commit negotiation, but some hardware don't implement + * that feature. + */ +static __u32 uvc_try_frame_interval(struct uvc_frame *frame, __u32 interval) +{ + unsigned int i; + + if (frame->bFrameIntervalType) { + __u32 best = -1, dist; + + for (i = 0; i < frame->bFrameIntervalType; ++i) { + dist = interval > frame->dwFrameInterval[i] + ? interval - frame->dwFrameInterval[i] + : frame->dwFrameInterval[i] - interval; + + if (dist > best) + break; + + best = dist; + } + + interval = frame->dwFrameInterval[i-1]; + } else { + const __u32 min = frame->dwFrameInterval[0]; + const __u32 max = frame->dwFrameInterval[1]; + const __u32 step = frame->dwFrameInterval[2]; + + interval = min + (interval - min + step/2) / step * step; + if (interval > max) + interval = max; + } + + return interval; +} + +static int uvc_v4l2_try_format(struct uvc_video_device *video, + struct v4l2_format *fmt, struct uvc_streaming_control *probe, + struct uvc_format **uvc_format, struct uvc_frame **uvc_frame) +{ + struct uvc_format *format = NULL; + struct uvc_frame *frame = NULL; + __u16 rw, rh; + unsigned int d, maxd; + unsigned int i; + __u32 interval; + int ret = 0; + __u8 *fcc; + + if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + fcc = (__u8 *)&fmt->fmt.pix.pixelformat; + uvc_trace(UVC_TRACE_FORMAT, "Trying format 0x%08x (%c%c%c%c): %ux%u.\n", + fmt->fmt.pix.pixelformat, + fcc[0], fcc[1], fcc[2], fcc[3], + fmt->fmt.pix.width, fmt->fmt.pix.height); + + /* Check if the hardware supports the requested format. */ + for (i = 0; i < video->streaming->nformats; ++i) { + format = &video->streaming->format[i]; + if (format->fcc == fmt->fmt.pix.pixelformat) + break; + } + + if (format == NULL || format->fcc != fmt->fmt.pix.pixelformat) { + uvc_trace(UVC_TRACE_FORMAT, "Unsupported format 0x%08x.\n", + fmt->fmt.pix.pixelformat); + return -EINVAL; + } + + /* Find the closest image size. The distance between image sizes is + * the size in pixels of the non-overlapping regions between the + * requested size and the frame-specified size. + */ + rw = fmt->fmt.pix.width; + rh = fmt->fmt.pix.height; + maxd = (unsigned int)-1; + + for (i = 0; i < format->nframes; ++i) { + __u16 w = format->frame[i].wWidth; + __u16 h = format->frame[i].wHeight; + + d = min(w, rw) * min(h, rh); + d = w*h + rw*rh - 2*d; + if (d < maxd) { + maxd = d; + frame = &format->frame[i]; + } + + if (maxd == 0) + break; + } + + if (frame == NULL) { + uvc_trace(UVC_TRACE_FORMAT, "Unsupported size %ux%u.\n", + fmt->fmt.pix.width, fmt->fmt.pix.height); + return -EINVAL; + } + + /* Use the default frame interval. */ + interval = frame->dwDefaultFrameInterval; + uvc_trace(UVC_TRACE_FORMAT, "Using default frame interval %u.%u us " + "(%u.%u fps).\n", interval/10, interval%10, 10000000/interval, + (100000000/interval)%10); + + /* Set the format index, frame index and frame interval. */ + memset(probe, 0, sizeof *probe); + probe->bmHint = 1; /* dwFrameInterval */ + probe->bFormatIndex = format->index; + probe->bFrameIndex = frame->bFrameIndex; + probe->dwFrameInterval = uvc_try_frame_interval(frame, interval); + /* Some webcams stall the probe control set request when the + * dwMaxVideoFrameSize field is set to zero. The UVC specification + * clearly states that the field is read-only from the host, so this + * is a webcam bug. Set dwMaxVideoFrameSize to the value reported by + * the webcam to work around the problem. + * + * The workaround could probably be enabled for all webcams, so the + * quirk can be removed if needed. It's currently useful to detect + * webcam bugs and fix them before they hit the market (providing + * developers test their webcams with the Linux driver as well as with + * the Windows driver). + */ + if (video->dev->quirks & UVC_QUIRK_PROBE_EXTRAFIELDS) + probe->dwMaxVideoFrameSize = + video->streaming->ctrl.dwMaxVideoFrameSize; + + /* Probe the device */ + if ((ret = uvc_probe_video(video, probe)) < 0) + goto done; + + fmt->fmt.pix.width = frame->wWidth; + fmt->fmt.pix.height = frame->wHeight; + fmt->fmt.pix.field = V4L2_FIELD_NONE; + fmt->fmt.pix.bytesperline = format->bpp * frame->wWidth / 8; + fmt->fmt.pix.sizeimage = probe->dwMaxVideoFrameSize; + fmt->fmt.pix.colorspace = format->colorspace; + fmt->fmt.pix.priv = 0; + + if (uvc_format != NULL) + *uvc_format = format; + if (uvc_frame != NULL) + *uvc_frame = frame; + +done: + return ret; +} + +static int uvc_v4l2_get_format(struct uvc_video_device *video, + struct v4l2_format *fmt) +{ + struct uvc_format *format = video->streaming->cur_format; + struct uvc_frame *frame = video->streaming->cur_frame; + + if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (format == NULL || frame == NULL) + return -EINVAL; + + fmt->fmt.pix.pixelformat = format->fcc; + fmt->fmt.pix.width = frame->wWidth; + fmt->fmt.pix.height = frame->wHeight; + fmt->fmt.pix.field = V4L2_FIELD_NONE; + fmt->fmt.pix.bytesperline = format->bpp * frame->wWidth / 8; + fmt->fmt.pix.sizeimage = video->streaming->ctrl.dwMaxVideoFrameSize; + fmt->fmt.pix.colorspace = format->colorspace; + fmt->fmt.pix.priv = 0; + + return 0; +} + +static int uvc_v4l2_set_format(struct uvc_video_device *video, + struct v4l2_format *fmt) +{ + struct uvc_streaming_control probe; + struct uvc_format *format; + struct uvc_frame *frame; + int ret; + + if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (uvc_queue_streaming(&video->queue)) + return -EBUSY; + + ret = uvc_v4l2_try_format(video, fmt, &probe, &format, &frame); + if (ret < 0) + return ret; + + if ((ret = uvc_set_video_ctrl(video, &probe, 0)) < 0) + return ret; + + memcpy(&video->streaming->ctrl, &probe, sizeof probe); + video->streaming->cur_format = format; + video->streaming->cur_frame = frame; + + return 0; +} + +static int uvc_v4l2_get_streamparm(struct uvc_video_device *video, + struct v4l2_streamparm *parm) +{ + uint32_t numerator, denominator; + + if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + numerator = video->streaming->ctrl.dwFrameInterval; + denominator = 10000000; + uvc_simplify_fraction(&numerator, &denominator, 8, 333); + + memset(parm, 0, sizeof *parm); + parm->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; + parm->parm.capture.capturemode = 0; + parm->parm.capture.timeperframe.numerator = numerator; + parm->parm.capture.timeperframe.denominator = denominator; + parm->parm.capture.extendedmode = 0; + parm->parm.capture.readbuffers = 0; + + return 0; +} + +static int uvc_v4l2_set_streamparm(struct uvc_video_device *video, + struct v4l2_streamparm *parm) +{ + struct uvc_frame *frame = video->streaming->cur_frame; + struct uvc_streaming_control probe; + uint32_t interval; + int ret; + + if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (uvc_queue_streaming(&video->queue)) + return -EBUSY; + + memcpy(&probe, &video->streaming->ctrl, sizeof probe); + interval = uvc_fraction_to_interval( + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + + uvc_trace(UVC_TRACE_FORMAT, "Setting frame interval to %u/%u (%u).\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator, + interval); + probe.dwFrameInterval = uvc_try_frame_interval(frame, interval); + + /* Probe the device with the new settings. */ + if ((ret = uvc_probe_video(video, &probe)) < 0) + return ret; + + /* Commit the new settings. */ + if ((ret = uvc_set_video_ctrl(video, &probe, 0)) < 0) + return ret; + + memcpy(&video->streaming->ctrl, &probe, sizeof probe); + + /* Return the actual frame period. */ + parm->parm.capture.timeperframe.numerator = probe.dwFrameInterval; + parm->parm.capture.timeperframe.denominator = 10000000; + uvc_simplify_fraction(&parm->parm.capture.timeperframe.numerator, + &parm->parm.capture.timeperframe.denominator, + 8, 333); + + return 0; +} + +/* ------------------------------------------------------------------------ + * Privilege management + */ + +/* + * Privilege management is the multiple-open implementation basis. The current + * implementation is completely transparent for the end-user and doesn't + * require explicit use of the VIDIOC_G_PRIORITY and VIDIOC_S_PRIORITY ioctls. + * Those ioctls enable finer control on the device (by making possible for a + * user to request exclusive access to a device), but are not mature yet. + * Switching to the V4L2 priority mechanism might be considered in the future + * if this situation changes. + * + * Each open instance of a UVC device can either be in a privileged or + * unprivileged state. Only a single instance can be in a privileged state at + * a given time. Trying to perform an operation which requires privileges will + * automatically acquire the required privileges if possible, or return -EBUSY + * otherwise. Privileges are dismissed when closing the instance. + * + * Operations which require privileges are: + * + * - VIDIOC_S_INPUT + * - VIDIOC_S_PARM + * - VIDIOC_S_FMT + * - VIDIOC_TRY_FMT + * - VIDIOC_REQBUFS + */ +static int uvc_acquire_privileges(struct uvc_fh *handle) +{ + int ret = 0; + + /* Always succeed if the handle is already privileged. */ + if (handle->state == UVC_HANDLE_ACTIVE) + return 0; + + /* Check if the device already has a privileged handle. */ + mutex_lock(&uvc_driver.open_mutex); + if (atomic_inc_return(&handle->device->active) != 1) { + atomic_dec(&handle->device->active); + ret = -EBUSY; + goto done; + } + + handle->state = UVC_HANDLE_ACTIVE; + +done: + mutex_unlock(&uvc_driver.open_mutex); + return ret; +} + +static void uvc_dismiss_privileges(struct uvc_fh *handle) +{ + if (handle->state == UVC_HANDLE_ACTIVE) + atomic_dec(&handle->device->active); + + handle->state = UVC_HANDLE_PASSIVE; +} + +static int uvc_has_privileges(struct uvc_fh *handle) +{ + return handle->state == UVC_HANDLE_ACTIVE; +} + +/* ------------------------------------------------------------------------ + * V4L2 file operations + */ + +static int uvc_v4l2_open(struct inode *inode, struct file *file) +{ + struct video_device *vdev; + struct uvc_video_device *video; + struct uvc_fh *handle; + int ret = 0; + + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_open\n"); + mutex_lock(&uvc_driver.open_mutex); + vdev = video_devdata(file); + video = video_get_drvdata(vdev); + + if (video->dev->state & UVC_DEV_DISCONNECTED) { + ret = -ENODEV; + goto done; + } + + ret = usb_autopm_get_interface(video->dev->intf); + if (ret < 0) + goto done; + + /* Create the device handle. */ + handle = kzalloc(sizeof *handle, GFP_KERNEL); + if (handle == NULL) { + usb_autopm_put_interface(video->dev->intf); + ret = -ENOMEM; + goto done; + } + + handle->device = video; + handle->state = UVC_HANDLE_PASSIVE; + file->private_data = handle; + + kref_get(&video->dev->kref); + +done: + mutex_unlock(&uvc_driver.open_mutex); + return ret; +} + +static int uvc_v4l2_release(struct inode *inode, struct file *file) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_video_device *video = video_get_drvdata(vdev); + struct uvc_fh *handle = (struct uvc_fh *)file->private_data; + + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_release\n"); + + /* Only free resources if this is a privileged handle. */ + if (uvc_has_privileges(handle)) { + uvc_video_enable(video, 0); + + mutex_lock(&video->queue.mutex); + if (uvc_free_buffers(&video->queue) < 0) + uvc_printk(KERN_ERR, "uvc_v4l2_release: Unable to " + "free buffers.\n"); + mutex_unlock(&video->queue.mutex); + } + + /* Release the file handle. */ + uvc_dismiss_privileges(handle); + kfree(handle); + file->private_data = NULL; + + usb_autopm_put_interface(video->dev->intf); + kref_put(&video->dev->kref, uvc_delete); + return 0; +} + +static int uvc_v4l2_do_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, void *arg) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_video_device *video = video_get_drvdata(vdev); + struct uvc_fh *handle = (struct uvc_fh *)file->private_data; + int ret = 0; + + if (uvc_trace_param & UVC_TRACE_IOCTL) + v4l_printk_ioctl(cmd); + + switch (cmd) { + /* Query capabilities */ + case VIDIOC_QUERYCAP: + { + struct v4l2_capability *cap = arg; + + memset(cap, 0, sizeof *cap); + strncpy(cap->driver, "uvcvideo", sizeof cap->driver); + strncpy(cap->card, vdev->name, 32); + strncpy(cap->bus_info, video->dev->udev->bus->bus_name, + sizeof cap->bus_info); + cap->version = DRIVER_VERSION_NUMBER; + cap->capabilities = V4L2_CAP_VIDEO_CAPTURE + | V4L2_CAP_STREAMING; + break; + } + + /* Get, Set & Query control */ + case VIDIOC_QUERYCTRL: + return uvc_query_v4l2_ctrl(video, arg); + + case VIDIOC_G_CTRL: + { + struct v4l2_control *ctrl = arg; + struct v4l2_ext_control xctrl; + + memset(&xctrl, 0, sizeof xctrl); + xctrl.id = ctrl->id; + + uvc_ctrl_begin(video); + ret = uvc_ctrl_get(video, &xctrl); + uvc_ctrl_rollback(video); + if (ret >= 0) + ctrl->value = xctrl.value; + break; + } + + case VIDIOC_S_CTRL: + { + struct v4l2_control *ctrl = arg; + struct v4l2_ext_control xctrl; + + memset(&xctrl, 0, sizeof xctrl); + xctrl.id = ctrl->id; + xctrl.value = ctrl->value; + + uvc_ctrl_begin(video); + ret = uvc_ctrl_set(video, &xctrl); + if (ret < 0) { + uvc_ctrl_rollback(video); + return ret; + } + ret = uvc_ctrl_commit(video); + break; + } + + case VIDIOC_QUERYMENU: + return uvc_v4l2_query_menu(video, arg); + + case VIDIOC_G_EXT_CTRLS: + { + struct v4l2_ext_controls *ctrls = arg; + struct v4l2_ext_control *ctrl = ctrls->controls; + unsigned int i; + + uvc_ctrl_begin(video); + for (i = 0; i < ctrls->count; ++ctrl, ++i) { + ret = uvc_ctrl_get(video, ctrl); + if (ret < 0) { + uvc_ctrl_rollback(video); + ctrls->error_idx = i; + return ret; + } + } + ctrls->error_idx = 0; + ret = uvc_ctrl_rollback(video); + break; + } + + case VIDIOC_S_EXT_CTRLS: + case VIDIOC_TRY_EXT_CTRLS: + { + struct v4l2_ext_controls *ctrls = arg; + struct v4l2_ext_control *ctrl = ctrls->controls; + unsigned int i; + + ret = uvc_ctrl_begin(video); + if (ret < 0) + return ret; + + for (i = 0; i < ctrls->count; ++ctrl, ++i) { + ret = uvc_ctrl_set(video, ctrl); + if (ret < 0) { + uvc_ctrl_rollback(video); + ctrls->error_idx = i; + return ret; + } + } + + ctrls->error_idx = 0; + + if (cmd == VIDIOC_S_EXT_CTRLS) + ret = uvc_ctrl_commit(video); + else + ret = uvc_ctrl_rollback(video); + break; + } + + /* Get, Set & Enum input */ + case VIDIOC_ENUMINPUT: + { + const struct uvc_entity *selector = video->selector; + struct v4l2_input *input = arg; + struct uvc_entity *iterm = NULL; + u32 index = input->index; + int pin = 0; + + if (selector == NULL || + (video->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { + if (index != 0) + return -EINVAL; + iterm = list_first_entry(&video->iterms, + struct uvc_entity, chain); + pin = iterm->id; + } else if (pin < selector->selector.bNrInPins) { + pin = selector->selector.baSourceID[index]; + list_for_each_entry(iterm, video->iterms.next, chain) { + if (iterm->id == pin) + break; + } + } + + if (iterm == NULL || iterm->id != pin) + return -EINVAL; + + memset(input, 0, sizeof *input); + input->index = index; + strncpy(input->name, iterm->name, sizeof input->name); + if (UVC_ENTITY_TYPE(iterm) == ITT_CAMERA) + input->type = V4L2_INPUT_TYPE_CAMERA; + break; + } + + case VIDIOC_G_INPUT: + { + u8 input; + + if (video->selector == NULL || + (video->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { + *(int *)arg = 0; + break; + } + + ret = uvc_query_ctrl(video->dev, GET_CUR, video->selector->id, + video->dev->intfnum, SU_INPUT_SELECT_CONTROL, + &input, 1); + if (ret < 0) + return ret; + + *(int *)arg = input - 1; + break; + } + + case VIDIOC_S_INPUT: + { + u8 input = *(u32 *)arg + 1; + + if ((ret = uvc_acquire_privileges(handle)) < 0) + return ret; + + if (video->selector == NULL || + (video->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { + if (input != 1) + return -EINVAL; + break; + } + + if (input > video->selector->selector.bNrInPins) + return -EINVAL; + + return uvc_query_ctrl(video->dev, SET_CUR, video->selector->id, + video->dev->intfnum, SU_INPUT_SELECT_CONTROL, + &input, 1); + } + + /* Try, Get, Set & Enum format */ + case VIDIOC_ENUM_FMT: + { + struct v4l2_fmtdesc *fmt = arg; + struct uvc_format *format; + + if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || + fmt->index >= video->streaming->nformats) + return -EINVAL; + + format = &video->streaming->format[fmt->index]; + fmt->flags = 0; + if (format->flags & UVC_FMT_FLAG_COMPRESSED) + fmt->flags |= V4L2_FMT_FLAG_COMPRESSED; + strncpy(fmt->description, format->name, + sizeof fmt->description); + fmt->description[sizeof fmt->description - 1] = 0; + fmt->pixelformat = format->fcc; + break; + } + + case VIDIOC_TRY_FMT: + { + struct uvc_streaming_control probe; + + if ((ret = uvc_acquire_privileges(handle)) < 0) + return ret; + + return uvc_v4l2_try_format(video, arg, &probe, NULL, NULL); + } + + case VIDIOC_S_FMT: + if ((ret = uvc_acquire_privileges(handle)) < 0) + return ret; + + return uvc_v4l2_set_format(video, arg); + + case VIDIOC_G_FMT: + return uvc_v4l2_get_format(video, arg); + + /* Frame size enumeration */ + case VIDIOC_ENUM_FRAMESIZES: + { + struct v4l2_frmsizeenum *fsize = arg; + struct uvc_format *format = NULL; + struct uvc_frame *frame; + int i; + + /* Look for the given pixel format */ + for (i = 0; i < video->streaming->nformats; i++) { + if (video->streaming->format[i].fcc == + fsize->pixel_format) { + format = &video->streaming->format[i]; + break; + } + } + if (format == NULL) + return -EINVAL; + + if (fsize->index >= format->nframes) + return -EINVAL; + + frame = &format->frame[fsize->index]; + fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; + fsize->discrete.width = frame->wWidth; + fsize->discrete.height = frame->wHeight; + break; + } + + /* Frame interval enumeration */ + case VIDIOC_ENUM_FRAMEINTERVALS: + { + struct v4l2_frmivalenum *fival = arg; + struct uvc_format *format = NULL; + struct uvc_frame *frame = NULL; + int i; + + /* Look for the given pixel format and frame size */ + for (i = 0; i < video->streaming->nformats; i++) { + if (video->streaming->format[i].fcc == + fival->pixel_format) { + format = &video->streaming->format[i]; + break; + } + } + if (format == NULL) + return -EINVAL; + + for (i = 0; i < format->nframes; i++) { + if (format->frame[i].wWidth == fival->width && + format->frame[i].wHeight == fival->height) { + frame = &format->frame[i]; + break; + } + } + if (frame == NULL) + return -EINVAL; + + if (frame->bFrameIntervalType) { + if (fival->index >= frame->bFrameIntervalType) + return -EINVAL; + + fival->type = V4L2_FRMIVAL_TYPE_DISCRETE; + fival->discrete.numerator = + frame->dwFrameInterval[fival->index]; + fival->discrete.denominator = 10000000; + uvc_simplify_fraction(&fival->discrete.numerator, + &fival->discrete.denominator, 8, 333); + } else { + fival->type = V4L2_FRMIVAL_TYPE_STEPWISE; + fival->stepwise.min.numerator = + frame->dwFrameInterval[0]; + fival->stepwise.min.denominator = 10000000; + fival->stepwise.max.numerator = + frame->dwFrameInterval[1]; + fival->stepwise.max.denominator = 10000000; + fival->stepwise.step.numerator = + frame->dwFrameInterval[2]; + fival->stepwise.step.denominator = 10000000; + uvc_simplify_fraction(&fival->stepwise.min.numerator, + &fival->stepwise.min.denominator, 8, 333); + uvc_simplify_fraction(&fival->stepwise.max.numerator, + &fival->stepwise.max.denominator, 8, 333); + uvc_simplify_fraction(&fival->stepwise.step.numerator, + &fival->stepwise.step.denominator, 8, 333); + } + break; + } + + /* Get & Set streaming parameters */ + case VIDIOC_G_PARM: + return uvc_v4l2_get_streamparm(video, arg); + + case VIDIOC_S_PARM: + if ((ret = uvc_acquire_privileges(handle)) < 0) + return ret; + + return uvc_v4l2_set_streamparm(video, arg); + + /* Cropping and scaling */ + case VIDIOC_CROPCAP: + { + struct v4l2_cropcap *ccap = arg; + struct uvc_frame *frame = video->streaming->cur_frame; + + if (ccap->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + ccap->bounds.left = 0; + ccap->bounds.top = 0; + ccap->bounds.width = frame->wWidth; + ccap->bounds.height = frame->wHeight; + + ccap->defrect = ccap->bounds; + + ccap->pixelaspect.numerator = 1; + ccap->pixelaspect.denominator = 1; + break; + } + + case VIDIOC_G_CROP: + case VIDIOC_S_CROP: + return -EINVAL; + + /* Buffers & streaming */ + case VIDIOC_REQBUFS: + { + struct v4l2_requestbuffers *rb = arg; + unsigned int bufsize = + video->streaming->ctrl.dwMaxVideoFrameSize; + + if (rb->type != V4L2_BUF_TYPE_VIDEO_CAPTURE || + rb->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + + if ((ret = uvc_acquire_privileges(handle)) < 0) + return ret; + + ret = uvc_alloc_buffers(&video->queue, rb->count, bufsize); + if (ret < 0) + return ret; + + if (!(video->streaming->cur_format->flags & + UVC_FMT_FLAG_COMPRESSED)) + video->queue.flags |= UVC_QUEUE_DROP_INCOMPLETE; + + rb->count = ret; + ret = 0; + break; + } + + case VIDIOC_QUERYBUF: + { + struct v4l2_buffer *buf = arg; + + if (buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (!uvc_has_privileges(handle)) + return -EBUSY; + + return uvc_query_buffer(&video->queue, buf); + } + + case VIDIOC_QBUF: + if (!uvc_has_privileges(handle)) + return -EBUSY; + + return uvc_queue_buffer(&video->queue, arg); + + case VIDIOC_DQBUF: + if (!uvc_has_privileges(handle)) + return -EBUSY; + + return uvc_dequeue_buffer(&video->queue, arg, + file->f_flags & O_NONBLOCK); + + case VIDIOC_STREAMON: + { + int *type = arg; + + if (*type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (!uvc_has_privileges(handle)) + return -EBUSY; + + if ((ret = uvc_video_enable(video, 1)) < 0) + return ret; + break; + } + + case VIDIOC_STREAMOFF: + { + int *type = arg; + + if (*type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (!uvc_has_privileges(handle)) + return -EBUSY; + + return uvc_video_enable(video, 0); + } + + /* Analog video standards make no sense for digital cameras. */ + case VIDIOC_ENUMSTD: + case VIDIOC_QUERYSTD: + case VIDIOC_G_STD: + case VIDIOC_S_STD: + + case VIDIOC_OVERLAY: + + case VIDIOC_ENUMAUDIO: + case VIDIOC_ENUMAUDOUT: + + case VIDIOC_ENUMOUTPUT: + uvc_trace(UVC_TRACE_IOCTL, "Unsupported ioctl 0x%08x\n", cmd); + return -EINVAL; + + /* Dynamic controls. */ + case UVCIOC_CTRL_ADD: + { + struct uvc_xu_control_info *xinfo = arg; + struct uvc_control_info *info; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + info = kmalloc(sizeof *info, GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + memcpy(info->entity, xinfo->entity, sizeof info->entity); + info->index = xinfo->index; + info->selector = xinfo->selector; + info->size = xinfo->size; + info->flags = xinfo->flags; + + info->flags |= UVC_CONTROL_GET_MIN | UVC_CONTROL_GET_MAX | + UVC_CONTROL_GET_RES | UVC_CONTROL_GET_DEF; + + ret = uvc_ctrl_add_info(info); + if (ret < 0) + kfree(info); + break; + } + + case UVCIOC_CTRL_MAP: + { + struct uvc_xu_control_mapping *xmap = arg; + struct uvc_control_mapping *map; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + map = kmalloc(sizeof *map, GFP_KERNEL); + if (map == NULL) + return -ENOMEM; + + map->id = xmap->id; + memcpy(map->name, xmap->name, sizeof map->name); + memcpy(map->entity, xmap->entity, sizeof map->entity); + map->selector = xmap->selector; + map->size = xmap->size; + map->offset = xmap->offset; + map->v4l2_type = xmap->v4l2_type; + map->data_type = xmap->data_type; + + ret = uvc_ctrl_add_mapping(map); + if (ret < 0) + kfree(map); + break; + } + + case UVCIOC_CTRL_GET: + return uvc_xu_ctrl_query(video, arg, 0); + + case UVCIOC_CTRL_SET: + return uvc_xu_ctrl_query(video, arg, 1); + + default: + if ((ret = v4l_compat_translate_ioctl(inode, file, cmd, arg, + uvc_v4l2_do_ioctl)) == -ENOIOCTLCMD) + uvc_trace(UVC_TRACE_IOCTL, "Unknown ioctl 0x%08x\n", + cmd); + return ret; + } + + return ret; +} + +static int uvc_v4l2_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_ioctl\n"); + return video_usercopy(inode, file, cmd, arg, uvc_v4l2_do_ioctl); +} + +static ssize_t uvc_v4l2_read(struct file *file, char __user *data, + size_t count, loff_t *ppos) +{ + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_read: not implemented.\n"); + return -ENODEV; +} + +/* + * VMA operations. + */ +static void uvc_vm_open(struct vm_area_struct *vma) +{ + struct uvc_buffer *buffer = vma->vm_private_data; + buffer->vma_use_count++; +} + +static void uvc_vm_close(struct vm_area_struct *vma) +{ + struct uvc_buffer *buffer = vma->vm_private_data; + buffer->vma_use_count--; +} + +static struct vm_operations_struct uvc_vm_ops = { + .open = uvc_vm_open, + .close = uvc_vm_close, +}; + +static int uvc_v4l2_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_video_device *video = video_get_drvdata(vdev); + struct uvc_buffer *buffer; + struct page *page; + unsigned long addr, start, size; + unsigned int i; + int ret = 0; + + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_mmap\n"); + + start = vma->vm_start; + size = vma->vm_end - vma->vm_start; + + mutex_lock(&video->queue.mutex); + + for (i = 0; i < video->queue.count; ++i) { + buffer = &video->queue.buffer[i]; + if ((buffer->buf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff) + break; + } + + if (i == video->queue.count || size != video->queue.buf_size) { + ret = -EINVAL; + goto done; + } + + /* + * VM_IO marks the area as being an mmaped region for I/O to a + * device. It also prevents the region from being core dumped. + */ + vma->vm_flags |= VM_IO; + + addr = (unsigned long)video->queue.mem + buffer->buf.m.offset; + while (size > 0) { + page = vmalloc_to_page((void *)addr); + if ((ret = vm_insert_page(vma, start, page)) < 0) + goto done; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &uvc_vm_ops; + vma->vm_private_data = buffer; + uvc_vm_open(vma); + +done: + mutex_unlock(&video->queue.mutex); + return ret; +} + +static unsigned int uvc_v4l2_poll(struct file *file, poll_table *wait) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_video_device *video = video_get_drvdata(vdev); + + uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_poll\n"); + + return uvc_queue_poll(&video->queue, file, wait); +} + +struct file_operations uvc_fops = { + .owner = THIS_MODULE, + .open = uvc_v4l2_open, + .release = uvc_v4l2_release, + .ioctl = uvc_v4l2_ioctl, + .compat_ioctl = v4l_compat_ioctl32, + .llseek = no_llseek, + .read = uvc_v4l2_read, + .mmap = uvc_v4l2_mmap, + .poll = uvc_v4l2_poll, +}; diff --git a/drivers/media/video/uvc/uvc_video.c b/drivers/media/video/uvc/uvc_video.c new file mode 100644 index 000000000000..6faf1fb21614 --- /dev/null +++ b/drivers/media/video/uvc/uvc_video.c @@ -0,0 +1,934 @@ +/* + * uvc_video.c -- USB Video Class driver - Video handling + * + * Copyright (C) 2005-2008 + * Laurent Pinchart ([email protected]) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/usb.h> +#include <linux/videodev2.h> +#include <linux/vmalloc.h> +#include <linux/wait.h> +#include <asm/atomic.h> +#include <asm/unaligned.h> + +#include <media/v4l2-common.h> + +#include "uvcvideo.h" + +/* ------------------------------------------------------------------------ + * UVC Controls + */ + +static int __uvc_query_ctrl(struct uvc_device *dev, __u8 query, __u8 unit, + __u8 intfnum, __u8 cs, void *data, __u16 size, + int timeout) +{ + __u8 type = USB_TYPE_CLASS | USB_RECIP_INTERFACE; + unsigned int pipe; + int ret; + + pipe = (query & 0x80) ? usb_rcvctrlpipe(dev->udev, 0) + : usb_sndctrlpipe(dev->udev, 0); + type |= (query & 0x80) ? USB_DIR_IN : USB_DIR_OUT; + + ret = usb_control_msg(dev->udev, pipe, query, type, cs << 8, + unit << 8 | intfnum, data, size, timeout); + + if (ret != size) { + uvc_printk(KERN_ERR, "Failed to query (%u) UVC control %u " + "(unit %u) : %d (exp. %u).\n", query, cs, unit, ret, + size); + return -EIO; + } + + return 0; +} + +int uvc_query_ctrl(struct uvc_device *dev, __u8 query, __u8 unit, + __u8 intfnum, __u8 cs, void *data, __u16 size) +{ + return __uvc_query_ctrl(dev, query, unit, intfnum, cs, data, size, + UVC_CTRL_CONTROL_TIMEOUT); +} + +static void uvc_fixup_buffer_size(struct uvc_video_device *video, + struct uvc_streaming_control *ctrl) +{ + struct uvc_format *format; + struct uvc_frame *frame; + + if (ctrl->bFormatIndex <= 0 || + ctrl->bFormatIndex > video->streaming->nformats) + return; + + format = &video->streaming->format[ctrl->bFormatIndex - 1]; + + if (ctrl->bFrameIndex <= 0 || + ctrl->bFrameIndex > format->nframes) + return; + + frame = &format->frame[ctrl->bFrameIndex - 1]; + + if (!(format->flags & UVC_FMT_FLAG_COMPRESSED) || + (ctrl->dwMaxVideoFrameSize == 0 && + video->dev->uvc_version < 0x0110)) + ctrl->dwMaxVideoFrameSize = + frame->dwMaxVideoFrameBufferSize; +} + +static int uvc_get_video_ctrl(struct uvc_video_device *video, + struct uvc_streaming_control *ctrl, int probe, __u8 query) +{ + __u8 data[34]; + __u8 size; + int ret; + + size = video->dev->uvc_version >= 0x0110 ? 34 : 26; + ret = __uvc_query_ctrl(video->dev, query, 0, video->streaming->intfnum, + probe ? VS_PROBE_CONTROL : VS_COMMIT_CONTROL, &data, size, + UVC_CTRL_STREAMING_TIMEOUT); + + if (ret < 0) + return ret; + + ctrl->bmHint = le16_to_cpup((__le16 *)&data[0]); + ctrl->bFormatIndex = data[2]; + ctrl->bFrameIndex = data[3]; + ctrl->dwFrameInterval = le32_to_cpup((__le32 *)&data[4]); + ctrl->wKeyFrameRate = le16_to_cpup((__le16 *)&data[8]); + ctrl->wPFrameRate = le16_to_cpup((__le16 *)&data[10]); + ctrl->wCompQuality = le16_to_cpup((__le16 *)&data[12]); + ctrl->wCompWindowSize = le16_to_cpup((__le16 *)&data[14]); + ctrl->wDelay = le16_to_cpup((__le16 *)&data[16]); + ctrl->dwMaxVideoFrameSize = + le32_to_cpu(get_unaligned((__le32 *)&data[18])); + ctrl->dwMaxPayloadTransferSize = + le32_to_cpu(get_unaligned((__le32 *)&data[22])); + + if (size == 34) { + ctrl->dwClockFrequency = + le32_to_cpu(get_unaligned((__le32 *)&data[26])); + ctrl->bmFramingInfo = data[30]; + ctrl->bPreferedVersion = data[31]; + ctrl->bMinVersion = data[32]; + ctrl->bMaxVersion = data[33]; + } else { + ctrl->dwClockFrequency = video->dev->clock_frequency; + ctrl->bmFramingInfo = 0; + ctrl->bPreferedVersion = 0; + ctrl->bMinVersion = 0; + ctrl->bMaxVersion = 0; + } + + /* Some broken devices return a null or wrong dwMaxVideoFrameSize. + * Try to get the value from the format and frame descriptor. + */ + uvc_fixup_buffer_size(video, ctrl); + + return 0; +} + +int uvc_set_video_ctrl(struct uvc_video_device *video, + struct uvc_streaming_control *ctrl, int probe) +{ + __u8 data[34]; + __u8 size; + + size = video->dev->uvc_version >= 0x0110 ? 34 : 26; + memset(data, 0, sizeof data); + + *(__le16 *)&data[0] = cpu_to_le16(ctrl->bmHint); + data[2] = ctrl->bFormatIndex; + data[3] = ctrl->bFrameIndex; + *(__le32 *)&data[4] = cpu_to_le32(ctrl->dwFrameInterval); + *(__le16 *)&data[8] = cpu_to_le16(ctrl->wKeyFrameRate); + *(__le16 *)&data[10] = cpu_to_le16(ctrl->wPFrameRate); + *(__le16 *)&data[12] = cpu_to_le16(ctrl->wCompQuality); + *(__le16 *)&data[14] = cpu_to_le16(ctrl->wCompWindowSize); + *(__le16 *)&data[16] = cpu_to_le16(ctrl->wDelay); + /* Note: Some of the fields below are not required for IN devices (see + * UVC spec, 4.3.1.1), but we still copy them in case support for OUT + * devices is added in the future. */ + put_unaligned(cpu_to_le32(ctrl->dwMaxVideoFrameSize), + (__le32 *)&data[18]); + put_unaligned(cpu_to_le32(ctrl->dwMaxPayloadTransferSize), + (__le32 *)&data[22]); + + if (size == 34) { + put_unaligned(cpu_to_le32(ctrl->dwClockFrequency), + (__le32 *)&data[26]); + data[30] = ctrl->bmFramingInfo; + data[31] = ctrl->bPreferedVersion; + data[32] = ctrl->bMinVersion; + data[33] = ctrl->bMaxVersion; + } + + return __uvc_query_ctrl(video->dev, SET_CUR, 0, + video->streaming->intfnum, + probe ? VS_PROBE_CONTROL : VS_COMMIT_CONTROL, &data, size, + UVC_CTRL_STREAMING_TIMEOUT); +} + +int uvc_probe_video(struct uvc_video_device *video, + struct uvc_streaming_control *probe) +{ + struct uvc_streaming_control probe_min, probe_max; + __u16 bandwidth; + unsigned int i; + int ret; + + mutex_lock(&video->streaming->mutex); + + /* Perform probing. The device should adjust the requested values + * according to its capabilities. However, some devices, namely the + * first generation UVC Logitech webcams, don't implement the Video + * Probe control properly, and just return the needed bandwidth. For + * that reason, if the needed bandwidth exceeds the maximum available + * bandwidth, try to lower the quality. + */ + if ((ret = uvc_set_video_ctrl(video, probe, 1)) < 0) + goto done; + + /* Get the minimum and maximum values for compression settings. */ + if (!(video->dev->quirks & UVC_QUIRK_PROBE_MINMAX)) { + ret = uvc_get_video_ctrl(video, &probe_min, 1, GET_MIN); + if (ret < 0) + goto done; + ret = uvc_get_video_ctrl(video, &probe_max, 1, GET_MAX); + if (ret < 0) + goto done; + + probe->wCompQuality = probe_max.wCompQuality; + } + + for (i = 0; i < 2; ++i) { + if ((ret = uvc_set_video_ctrl(video, probe, 1)) < 0 || + (ret = uvc_get_video_ctrl(video, probe, 1, GET_CUR)) < 0) + goto done; + + if (video->streaming->intf->num_altsetting == 1) + break; + + bandwidth = probe->dwMaxPayloadTransferSize; + if (bandwidth <= video->streaming->maxpsize) + break; + + if (video->dev->quirks & UVC_QUIRK_PROBE_MINMAX) { + ret = -ENOSPC; + goto done; + } + + /* TODO: negotiate compression parameters */ + probe->wKeyFrameRate = probe_min.wKeyFrameRate; + probe->wPFrameRate = probe_min.wPFrameRate; + probe->wCompQuality = probe_max.wCompQuality; + probe->wCompWindowSize = probe_min.wCompWindowSize; + } + +done: + mutex_unlock(&video->streaming->mutex); + return ret; +} + +/* ------------------------------------------------------------------------ + * Video codecs + */ + +/* Values for bmHeaderInfo (Video and Still Image Payload Headers, 2.4.3.3) */ +#define UVC_STREAM_EOH (1 << 7) +#define UVC_STREAM_ERR (1 << 6) +#define UVC_STREAM_STI (1 << 5) +#define UVC_STREAM_RES (1 << 4) +#define UVC_STREAM_SCR (1 << 3) +#define UVC_STREAM_PTS (1 << 2) +#define UVC_STREAM_EOF (1 << 1) +#define UVC_STREAM_FID (1 << 0) + +/* Video payload decoding is handled by uvc_video_decode_start(), + * uvc_video_decode_data() and uvc_video_decode_end(). + * + * uvc_video_decode_start is called with URB data at the start of a bulk or + * isochronous payload. It processes header data and returns the header size + * in bytes if successful. If an error occurs, it returns a negative error + * code. The following error codes have special meanings. + * + * - EAGAIN informs the caller that the current video buffer should be marked + * as done, and that the function should be called again with the same data + * and a new video buffer. This is used when end of frame conditions can be + * reliably detected at the beginning of the next frame only. + * + * If an error other than -EAGAIN is returned, the caller will drop the current + * payload. No call to uvc_video_decode_data and uvc_video_decode_end will be + * made until the next payload. -ENODATA can be used to drop the current + * payload if no other error code is appropriate. + * + * uvc_video_decode_data is called for every URB with URB data. It copies the + * data to the video buffer. + * + * uvc_video_decode_end is called with header data at the end of a bulk or + * isochronous payload. It performs any additional header data processing and + * returns 0 or a negative error code if an error occured. As header data have + * already been processed by uvc_video_decode_start, this functions isn't + * required to perform sanity checks a second time. + * + * For isochronous transfers where a payload is always transfered in a single + * URB, the three functions will be called in a row. + * + * To let the decoder process header data and update its internal state even + * when no video buffer is available, uvc_video_decode_start must be prepared + * to be called with a NULL buf parameter. uvc_video_decode_data and + * uvc_video_decode_end will never be called with a NULL buffer. + */ +static int uvc_video_decode_start(struct uvc_video_device *video, + struct uvc_buffer *buf, const __u8 *data, int len) +{ + __u8 fid; + + /* Sanity checks: + * - packet must be at least 2 bytes long + * - bHeaderLength value must be at least 2 bytes (see above) + * - bHeaderLength value can't be larger than the packet size. + */ + if (len < 2 || data[0] < 2 || data[0] > len) + return -EINVAL; + + /* Skip payloads marked with the error bit ("error frames"). */ + if (data[1] & UVC_STREAM_ERR) { + uvc_trace(UVC_TRACE_FRAME, "Dropping payload (error bit " + "set).\n"); + return -ENODATA; + } + + fid = data[1] & UVC_STREAM_FID; + + /* Store the payload FID bit and return immediately when the buffer is + * NULL. + */ + if (buf == NULL) { + video->last_fid = fid; + return -ENODATA; + } + + /* Synchronize to the input stream by waiting for the FID bit to be + * toggled when the the buffer state is not UVC_BUF_STATE_ACTIVE. + * queue->last_fid is initialized to -1, so the first isochronous + * frame will always be in sync. + * + * If the device doesn't toggle the FID bit, invert video->last_fid + * when the EOF bit is set to force synchronisation on the next packet. + */ + if (buf->state != UVC_BUF_STATE_ACTIVE) { + if (fid == video->last_fid) { + uvc_trace(UVC_TRACE_FRAME, "Dropping payload (out of " + "sync).\n"); + if ((video->dev->quirks & UVC_QUIRK_STREAM_NO_FID) && + (data[1] & UVC_STREAM_EOF)) + video->last_fid ^= UVC_STREAM_FID; + return -ENODATA; + } + + /* TODO: Handle PTS and SCR. */ + buf->state = UVC_BUF_STATE_ACTIVE; + } + + /* Mark the buffer as done if we're at the beginning of a new frame. + * End of frame detection is better implemented by checking the EOF + * bit (FID bit toggling is delayed by one frame compared to the EOF + * bit), but some devices don't set the bit at end of frame (and the + * last payload can be lost anyway). We thus must check if the FID has + * been toggled. + * + * queue->last_fid is initialized to -1, so the first isochronous + * frame will never trigger an end of frame detection. + * + * Empty buffers (bytesused == 0) don't trigger end of frame detection + * as it doesn't make sense to return an empty buffer. This also + * avoids detecting and of frame conditions at FID toggling if the + * previous payload had the EOF bit set. + */ + if (fid != video->last_fid && buf->buf.bytesused != 0) { + uvc_trace(UVC_TRACE_FRAME, "Frame complete (FID bit " + "toggled).\n"); + buf->state = UVC_BUF_STATE_DONE; + return -EAGAIN; + } + + video->last_fid = fid; + + return data[0]; +} + +static void uvc_video_decode_data(struct uvc_video_device *video, + struct uvc_buffer *buf, const __u8 *data, int len) +{ + struct uvc_video_queue *queue = &video->queue; + unsigned int maxlen, nbytes; + void *mem; + + if (len <= 0) + return; + + /* Copy the video data to the buffer. */ + maxlen = buf->buf.length - buf->buf.bytesused; + mem = queue->mem + buf->buf.m.offset + buf->buf.bytesused; + nbytes = min((unsigned int)len, maxlen); + memcpy(mem, data, nbytes); + buf->buf.bytesused += nbytes; + + /* Complete the current frame if the buffer size was exceeded. */ + if (len > maxlen) { + uvc_trace(UVC_TRACE_FRAME, "Frame complete (overflow).\n"); + buf->state = UVC_BUF_STATE_DONE; + } +} + +static void uvc_video_decode_end(struct uvc_video_device *video, + struct uvc_buffer *buf, const __u8 *data, int len) +{ + /* Mark the buffer as done if the EOF marker is set. */ + if (data[1] & UVC_STREAM_EOF && buf->buf.bytesused != 0) { + uvc_trace(UVC_TRACE_FRAME, "Frame complete (EOF found).\n"); + if (data[0] == len) + uvc_trace(UVC_TRACE_FRAME, "EOF in empty payload.\n"); + buf->state = UVC_BUF_STATE_DONE; + if (video->dev->quirks & UVC_QUIRK_STREAM_NO_FID) + video->last_fid ^= UVC_STREAM_FID; + } +} + +/* ------------------------------------------------------------------------ + * URB handling + */ + +/* + * Completion handler for video URBs. + */ +static void uvc_video_decode_isoc(struct urb *urb, + struct uvc_video_device *video, struct uvc_buffer *buf) +{ + u8 *mem; + int ret, i; + + for (i = 0; i < urb->number_of_packets; ++i) { + if (urb->iso_frame_desc[i].status < 0) { + uvc_trace(UVC_TRACE_FRAME, "USB isochronous frame " + "lost (%d).\n", urb->iso_frame_desc[i].status); + continue; + } + + /* Decode the payload header. */ + mem = urb->transfer_buffer + urb->iso_frame_desc[i].offset; + do { + ret = uvc_video_decode_start(video, buf, mem, + urb->iso_frame_desc[i].actual_length); + if (ret == -EAGAIN) + buf = uvc_queue_next_buffer(&video->queue, buf); + } while (ret == -EAGAIN); + + if (ret < 0) + continue; + + /* Decode the payload data. */ + uvc_video_decode_data(video, buf, mem + ret, + urb->iso_frame_desc[i].actual_length - ret); + + /* Process the header again. */ + uvc_video_decode_end(video, buf, mem, ret); + + if (buf->state == UVC_BUF_STATE_DONE || + buf->state == UVC_BUF_STATE_ERROR) + buf = uvc_queue_next_buffer(&video->queue, buf); + } +} + +static void uvc_video_decode_bulk(struct urb *urb, + struct uvc_video_device *video, struct uvc_buffer *buf) +{ + u8 *mem; + int len, ret; + + mem = urb->transfer_buffer; + len = urb->actual_length; + video->bulk.payload_size += len; + + /* If the URB is the first of its payload, decode and save the + * header. + */ + if (video->bulk.header_size == 0) { + do { + ret = uvc_video_decode_start(video, buf, mem, len); + if (ret == -EAGAIN) + buf = uvc_queue_next_buffer(&video->queue, buf); + } while (ret == -EAGAIN); + + /* If an error occured skip the rest of the payload. */ + if (ret < 0 || buf == NULL) { + video->bulk.skip_payload = 1; + return; + } + + video->bulk.header_size = ret; + memcpy(video->bulk.header, mem, video->bulk.header_size); + + mem += ret; + len -= ret; + } + + /* The buffer queue might have been cancelled while a bulk transfer + * was in progress, so we can reach here with buf equal to NULL. Make + * sure buf is never dereferenced if NULL. + */ + + /* Process video data. */ + if (!video->bulk.skip_payload && buf != NULL) + uvc_video_decode_data(video, buf, mem, len); + + /* Detect the payload end by a URB smaller than the maximum size (or + * a payload size equal to the maximum) and process the header again. + */ + if (urb->actual_length < urb->transfer_buffer_length || + video->bulk.payload_size >= video->bulk.max_payload_size) { + if (!video->bulk.skip_payload && buf != NULL) { + uvc_video_decode_end(video, buf, video->bulk.header, + video->bulk.header_size); + if (buf->state == UVC_BUF_STATE_DONE || + buf->state == UVC_BUF_STATE_ERROR) + buf = uvc_queue_next_buffer(&video->queue, buf); + } + + video->bulk.header_size = 0; + video->bulk.skip_payload = 0; + video->bulk.payload_size = 0; + } +} + +static void uvc_video_complete(struct urb *urb) +{ + struct uvc_video_device *video = urb->context; + struct uvc_video_queue *queue = &video->queue; + struct uvc_buffer *buf = NULL; + unsigned long flags; + int ret; + + switch (urb->status) { + case 0: + break; + + default: + uvc_printk(KERN_WARNING, "Non-zero status (%d) in video " + "completion handler.\n", urb->status); + + case -ENOENT: /* usb_kill_urb() called. */ + if (video->frozen) + return; + + case -ECONNRESET: /* usb_unlink_urb() called. */ + case -ESHUTDOWN: /* The endpoint is being disabled. */ + uvc_queue_cancel(queue, urb->status == -ESHUTDOWN); + return; + } + + spin_lock_irqsave(&queue->irqlock, flags); + if (!list_empty(&queue->irqqueue)) + buf = list_first_entry(&queue->irqqueue, struct uvc_buffer, + queue); + spin_unlock_irqrestore(&queue->irqlock, flags); + + video->decode(urb, video, buf); + + if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) { + uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n", + ret); + } +} + +/* + * Uninitialize isochronous/bulk URBs and free transfer buffers. + */ +static void uvc_uninit_video(struct uvc_video_device *video) +{ + struct urb *urb; + unsigned int i; + + for (i = 0; i < UVC_URBS; ++i) { + if ((urb = video->urb[i]) == NULL) + continue; + + usb_kill_urb(urb); + /* urb->transfer_buffer_length is not touched by USB core, so + * we can use it here as the buffer length. + */ + if (video->urb_buffer[i]) { + usb_buffer_free(video->dev->udev, + urb->transfer_buffer_length, + video->urb_buffer[i], urb->transfer_dma); + video->urb_buffer[i] = NULL; + } + + usb_free_urb(urb); + video->urb[i] = NULL; + } +} + +/* + * Initialize isochronous URBs and allocate transfer buffers. The packet size + * is given by the endpoint. + */ +static int uvc_init_video_isoc(struct uvc_video_device *video, + struct usb_host_endpoint *ep) +{ + struct urb *urb; + unsigned int npackets, i, j; + __u16 psize; + __u32 size; + + /* Compute the number of isochronous packets to allocate by dividing + * the maximum video frame size by the packet size. Limit the result + * to UVC_MAX_ISO_PACKETS. + */ + psize = le16_to_cpu(ep->desc.wMaxPacketSize); + psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3)); + + size = video->streaming->ctrl.dwMaxVideoFrameSize; + if (size > UVC_MAX_FRAME_SIZE) + return -EINVAL; + + npackets = (size + psize - 1) / psize; + if (npackets > UVC_MAX_ISO_PACKETS) + npackets = UVC_MAX_ISO_PACKETS; + + size = npackets * psize; + + for (i = 0; i < UVC_URBS; ++i) { + urb = usb_alloc_urb(npackets, GFP_KERNEL); + if (urb == NULL) { + uvc_uninit_video(video); + return -ENOMEM; + } + + video->urb_buffer[i] = usb_buffer_alloc(video->dev->udev, + size, GFP_KERNEL, &urb->transfer_dma); + if (video->urb_buffer[i] == NULL) { + usb_free_urb(urb); + uvc_uninit_video(video); + return -ENOMEM; + } + + urb->dev = video->dev->udev; + urb->context = video; + urb->pipe = usb_rcvisocpipe(video->dev->udev, + ep->desc.bEndpointAddress); + urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; + urb->interval = ep->desc.bInterval; + urb->transfer_buffer = video->urb_buffer[i]; + urb->complete = uvc_video_complete; + urb->number_of_packets = npackets; + urb->transfer_buffer_length = size; + + for (j = 0; j < npackets; ++j) { + urb->iso_frame_desc[j].offset = j * psize; + urb->iso_frame_desc[j].length = psize; + } + + video->urb[i] = urb; + } + + return 0; +} + +/* + * Initialize bulk URBs and allocate transfer buffers. The packet size is + * given by the endpoint. + */ +static int uvc_init_video_bulk(struct uvc_video_device *video, + struct usb_host_endpoint *ep) +{ + struct urb *urb; + unsigned int pipe, i; + __u16 psize; + __u32 size; + + /* Compute the bulk URB size. Some devices set the maximum payload + * size to a value too high for memory-constrained devices. We must + * then transfer the payload accross multiple URBs. To be consistant + * with isochronous mode, allocate maximum UVC_MAX_ISO_PACKETS per bulk + * URB. + */ + psize = le16_to_cpu(ep->desc.wMaxPacketSize) & 0x07ff; + size = video->streaming->ctrl.dwMaxPayloadTransferSize; + video->bulk.max_payload_size = size; + if (size > psize * UVC_MAX_ISO_PACKETS) + size = psize * UVC_MAX_ISO_PACKETS; + + pipe = usb_rcvbulkpipe(video->dev->udev, ep->desc.bEndpointAddress); + + for (i = 0; i < UVC_URBS; ++i) { + urb = usb_alloc_urb(0, GFP_KERNEL); + if (urb == NULL) { + uvc_uninit_video(video); + return -ENOMEM; + } + + video->urb_buffer[i] = usb_buffer_alloc(video->dev->udev, + size, GFP_KERNEL, &urb->transfer_dma); + if (video->urb_buffer[i] == NULL) { + usb_free_urb(urb); + uvc_uninit_video(video); + return -ENOMEM; + } + + usb_fill_bulk_urb(urb, video->dev->udev, pipe, + video->urb_buffer[i], size, uvc_video_complete, + video); + urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; + + video->urb[i] = urb; + } + + return 0; +} + +/* + * Initialize isochronous/bulk URBs and allocate transfer buffers. + */ +static int uvc_init_video(struct uvc_video_device *video) +{ + struct usb_interface *intf = video->streaming->intf; + struct usb_host_interface *alts; + struct usb_host_endpoint *ep = NULL; + int intfnum = video->streaming->intfnum; + unsigned int bandwidth, psize, i; + int ret; + + video->last_fid = -1; + video->bulk.header_size = 0; + video->bulk.skip_payload = 0; + video->bulk.payload_size = 0; + + if (intf->num_altsetting > 1) { + /* Isochronous endpoint, select the alternate setting. */ + bandwidth = video->streaming->ctrl.dwMaxPayloadTransferSize; + + if (bandwidth == 0) { + uvc_printk(KERN_WARNING, "device %s requested null " + "bandwidth, defaulting to lowest.\n", + video->vdev->name); + bandwidth = 1; + } + + for (i = 0; i < intf->num_altsetting; ++i) { + alts = &intf->altsetting[i]; + ep = uvc_find_endpoint(alts, + video->streaming->header.bEndpointAddress); + if (ep == NULL) + continue; + + /* Check if the bandwidth is high enough. */ + psize = le16_to_cpu(ep->desc.wMaxPacketSize); + psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3)); + if (psize >= bandwidth) + break; + } + + if (i >= intf->num_altsetting) + return -EIO; + + if ((ret = usb_set_interface(video->dev->udev, intfnum, i)) < 0) + return ret; + + ret = uvc_init_video_isoc(video, ep); + } else { + /* Bulk endpoint, proceed to URB initialization. */ + ep = uvc_find_endpoint(&intf->altsetting[0], + video->streaming->header.bEndpointAddress); + if (ep == NULL) + return -EIO; + + ret = uvc_init_video_bulk(video, ep); + } + + if (ret < 0) + return ret; + + /* Submit the URBs. */ + for (i = 0; i < UVC_URBS; ++i) { + if ((ret = usb_submit_urb(video->urb[i], GFP_KERNEL)) < 0) { + uvc_printk(KERN_ERR, "Failed to submit URB %u " + "(%d).\n", i, ret); + uvc_uninit_video(video); + return ret; + } + } + + return 0; +} + +/* -------------------------------------------------------------------------- + * Suspend/resume + */ + +/* + * Stop streaming without disabling the video queue. + * + * To let userspace applications resume without trouble, we must not touch the + * video buffers in any way. We mark the device as frozen to make sure the URB + * completion handler won't try to cancel the queue when we kill the URBs. + */ +int uvc_video_suspend(struct uvc_video_device *video) +{ + if (!uvc_queue_streaming(&video->queue)) + return 0; + + video->frozen = 1; + uvc_uninit_video(video); + usb_set_interface(video->dev->udev, video->streaming->intfnum, 0); + return 0; +} + +/* + * Reconfigure the video interface and restart streaming if it was enable + * before suspend. + * + * If an error occurs, disable the video queue. This will wake all pending + * buffers, making sure userspace applications are notified of the problem + * instead of waiting forever. + */ +int uvc_video_resume(struct uvc_video_device *video) +{ + int ret; + + video->frozen = 0; + + if ((ret = uvc_set_video_ctrl(video, &video->streaming->ctrl, 0)) < 0) { + uvc_queue_enable(&video->queue, 0); + return ret; + } + + if (!uvc_queue_streaming(&video->queue)) + return 0; + + if ((ret = uvc_init_video(video)) < 0) + uvc_queue_enable(&video->queue, 0); + + return ret; +} + +/* ------------------------------------------------------------------------ + * Video device + */ + +/* + * Initialize the UVC video device by retrieving the default format and + * committing it. + * + * Some cameras (namely the Fuji Finepix) set the format and frame + * indexes to zero. The UVC standard doesn't clearly make this a spec + * violation, so try to silently fix the values if possible. + * + * This function is called before registering the device with V4L. + */ +int uvc_video_init(struct uvc_video_device *video) +{ + struct uvc_streaming_control *probe = &video->streaming->ctrl; + struct uvc_format *format = NULL; + struct uvc_frame *frame = NULL; + unsigned int i; + int ret; + + if (video->streaming->nformats == 0) { + uvc_printk(KERN_INFO, "No supported video formats found.\n"); + return -EINVAL; + } + + /* Alternate setting 0 should be the default, yet the XBox Live Vision + * Cam (and possibly other devices) crash or otherwise misbehave if + * they don't receive a SET_INTERFACE request before any other video + * control request. + */ + usb_set_interface(video->dev->udev, video->streaming->intfnum, 0); + + /* Some webcams don't suport GET_DEF request on the probe control. We + * fall back to GET_CUR if GET_DEF fails. + */ + if ((ret = uvc_get_video_ctrl(video, probe, 1, GET_DEF)) < 0 && + (ret = uvc_get_video_ctrl(video, probe, 1, GET_CUR)) < 0) + return ret; + + /* Check if the default format descriptor exists. Use the first + * available format otherwise. + */ + for (i = video->streaming->nformats; i > 0; --i) { + format = &video->streaming->format[i-1]; + if (format->index == probe->bFormatIndex) + break; + } + + if (format->nframes == 0) { + uvc_printk(KERN_INFO, "No frame descriptor found for the " + "default format.\n"); + return -EINVAL; + } + + /* Zero bFrameIndex might be correct. Stream-based formats (including + * MPEG-2 TS and DV) do not support frames but have a dummy frame + * descriptor with bFrameIndex set to zero. If the default frame + * descriptor is not found, use the first avalable frame. + */ + for (i = format->nframes; i > 0; --i) { + frame = &format->frame[i-1]; + if (frame->bFrameIndex == probe->bFrameIndex) + break; + } + + /* Commit the default settings. */ + probe->bFormatIndex = format->index; + probe->bFrameIndex = frame->bFrameIndex; + if ((ret = uvc_set_video_ctrl(video, probe, 0)) < 0) + return ret; + + video->streaming->cur_format = format; + video->streaming->cur_frame = frame; + atomic_set(&video->active, 0); + + /* Select the video decoding function */ + if (video->dev->quirks & UVC_QUIRK_BUILTIN_ISIGHT) + video->decode = uvc_video_decode_isight; + else if (video->streaming->intf->num_altsetting > 1) + video->decode = uvc_video_decode_isoc; + else + video->decode = uvc_video_decode_bulk; + + return 0; +} + +/* + * Enable or disable the video stream. + */ +int uvc_video_enable(struct uvc_video_device *video, int enable) +{ + int ret; + + if (!enable) { + uvc_uninit_video(video); + usb_set_interface(video->dev->udev, + video->streaming->intfnum, 0); + uvc_queue_enable(&video->queue, 0); + return 0; + } + + if ((ret = uvc_queue_enable(&video->queue, 1)) < 0) + return ret; + + return uvc_init_video(video); +} diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h new file mode 100644 index 000000000000..a995a780db1c --- /dev/null +++ b/drivers/media/video/uvc/uvcvideo.h @@ -0,0 +1,796 @@ +#ifndef _USB_VIDEO_H_ +#define _USB_VIDEO_H_ + +#include <linux/kernel.h> +#include <linux/videodev2.h> + + +/* + * Dynamic controls + */ + +/* Data types for UVC control data */ +#define UVC_CTRL_DATA_TYPE_RAW 0 +#define UVC_CTRL_DATA_TYPE_SIGNED 1 +#define UVC_CTRL_DATA_TYPE_UNSIGNED 2 +#define UVC_CTRL_DATA_TYPE_BOOLEAN 3 +#define UVC_CTRL_DATA_TYPE_ENUM 4 +#define UVC_CTRL_DATA_TYPE_BITMASK 5 + +/* Control flags */ +#define UVC_CONTROL_SET_CUR (1 << 0) +#define UVC_CONTROL_GET_CUR (1 << 1) +#define UVC_CONTROL_GET_MIN (1 << 2) +#define UVC_CONTROL_GET_MAX (1 << 3) +#define UVC_CONTROL_GET_RES (1 << 4) +#define UVC_CONTROL_GET_DEF (1 << 5) +/* Control should be saved at suspend and restored at resume. */ +#define UVC_CONTROL_RESTORE (1 << 6) +/* Control can be updated by the camera. */ +#define UVC_CONTROL_AUTO_UPDATE (1 << 7) + +#define UVC_CONTROL_GET_RANGE (UVC_CONTROL_GET_CUR | UVC_CONTROL_GET_MIN | \ + UVC_CONTROL_GET_MAX | UVC_CONTROL_GET_RES | \ + UVC_CONTROL_GET_DEF) + +struct uvc_xu_control_info { + __u8 entity[16]; + __u8 index; + __u8 selector; + __u16 size; + __u32 flags; +}; + +struct uvc_xu_control_mapping { + __u32 id; + __u8 name[32]; + __u8 entity[16]; + __u8 selector; + + __u8 size; + __u8 offset; + enum v4l2_ctrl_type v4l2_type; + __u32 data_type; +}; + +struct uvc_xu_control { + __u8 unit; + __u8 selector; + __u16 size; + __u8 __user *data; +}; + +#define UVCIOC_CTRL_ADD _IOW('U', 1, struct uvc_xu_control_info) +#define UVCIOC_CTRL_MAP _IOWR('U', 2, struct uvc_xu_control_mapping) +#define UVCIOC_CTRL_GET _IOWR('U', 3, struct uvc_xu_control) +#define UVCIOC_CTRL_SET _IOW('U', 4, struct uvc_xu_control) + +#ifdef __KERNEL__ + +#include <linux/poll.h> + +/* -------------------------------------------------------------------------- + * UVC constants + */ + +#define SC_UNDEFINED 0x00 +#define SC_VIDEOCONTROL 0x01 +#define SC_VIDEOSTREAMING 0x02 +#define SC_VIDEO_INTERFACE_COLLECTION 0x03 + +#define PC_PROTOCOL_UNDEFINED 0x00 + +#define CS_UNDEFINED 0x20 +#define CS_DEVICE 0x21 +#define CS_CONFIGURATION 0x22 +#define CS_STRING 0x23 +#define CS_INTERFACE 0x24 +#define CS_ENDPOINT 0x25 + +/* VideoControl class specific interface descriptor */ +#define VC_DESCRIPTOR_UNDEFINED 0x00 +#define VC_HEADER 0x01 +#define VC_INPUT_TERMINAL 0x02 +#define VC_OUTPUT_TERMINAL 0x03 +#define VC_SELECTOR_UNIT 0x04 +#define VC_PROCESSING_UNIT 0x05 +#define VC_EXTENSION_UNIT 0x06 + +/* VideoStreaming class specific interface descriptor */ +#define VS_UNDEFINED 0x00 +#define VS_INPUT_HEADER 0x01 +#define VS_OUTPUT_HEADER 0x02 +#define VS_STILL_IMAGE_FRAME 0x03 +#define VS_FORMAT_UNCOMPRESSED 0x04 +#define VS_FRAME_UNCOMPRESSED 0x05 +#define VS_FORMAT_MJPEG 0x06 +#define VS_FRAME_MJPEG 0x07 +#define VS_FORMAT_MPEG2TS 0x0a +#define VS_FORMAT_DV 0x0c +#define VS_COLORFORMAT 0x0d +#define VS_FORMAT_FRAME_BASED 0x10 +#define VS_FRAME_FRAME_BASED 0x11 +#define VS_FORMAT_STREAM_BASED 0x12 + +/* Endpoint type */ +#define EP_UNDEFINED 0x00 +#define EP_GENERAL 0x01 +#define EP_ENDPOINT 0x02 +#define EP_INTERRUPT 0x03 + +/* Request codes */ +#define RC_UNDEFINED 0x00 +#define SET_CUR 0x01 +#define GET_CUR 0x81 +#define GET_MIN 0x82 +#define GET_MAX 0x83 +#define GET_RES 0x84 +#define GET_LEN 0x85 +#define GET_INFO 0x86 +#define GET_DEF 0x87 + +/* VideoControl interface controls */ +#define VC_CONTROL_UNDEFINED 0x00 +#define VC_VIDEO_POWER_MODE_CONTROL 0x01 +#define VC_REQUEST_ERROR_CODE_CONTROL 0x02 + +/* Terminal controls */ +#define TE_CONTROL_UNDEFINED 0x00 + +/* Selector Unit controls */ +#define SU_CONTROL_UNDEFINED 0x00 +#define SU_INPUT_SELECT_CONTROL 0x01 + +/* Camera Terminal controls */ +#define CT_CONTROL_UNDEFINED 0x00 +#define CT_SCANNING_MODE_CONTROL 0x01 +#define CT_AE_MODE_CONTROL 0x02 +#define CT_AE_PRIORITY_CONTROL 0x03 +#define CT_EXPOSURE_TIME_ABSOLUTE_CONTROL 0x04 +#define CT_EXPOSURE_TIME_RELATIVE_CONTROL 0x05 +#define CT_FOCUS_ABSOLUTE_CONTROL 0x06 +#define CT_FOCUS_RELATIVE_CONTROL 0x07 +#define CT_FOCUS_AUTO_CONTROL 0x08 +#define CT_IRIS_ABSOLUTE_CONTROL 0x09 +#define CT_IRIS_RELATIVE_CONTROL 0x0a +#define CT_ZOOM_ABSOLUTE_CONTROL 0x0b +#define CT_ZOOM_RELATIVE_CONTROL 0x0c +#define CT_PANTILT_ABSOLUTE_CONTROL 0x0d +#define CT_PANTILT_RELATIVE_CONTROL 0x0e +#define CT_ROLL_ABSOLUTE_CONTROL 0x0f +#define CT_ROLL_RELATIVE_CONTROL 0x10 +#define CT_PRIVACY_CONTROL 0x11 + +/* Processing Unit controls */ +#define PU_CONTROL_UNDEFINED 0x00 +#define PU_BACKLIGHT_COMPENSATION_CONTROL 0x01 +#define PU_BRIGHTNESS_CONTROL 0x02 +#define PU_CONTRAST_CONTROL 0x03 +#define PU_GAIN_CONTROL 0x04 +#define PU_POWER_LINE_FREQUENCY_CONTROL 0x05 +#define PU_HUE_CONTROL 0x06 +#define PU_SATURATION_CONTROL 0x07 +#define PU_SHARPNESS_CONTROL 0x08 +#define PU_GAMMA_CONTROL 0x09 +#define PU_WHITE_BALANCE_TEMPERATURE_CONTROL 0x0a +#define PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL 0x0b +#define PU_WHITE_BALANCE_COMPONENT_CONTROL 0x0c +#define PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL 0x0d +#define PU_DIGITAL_MULTIPLIER_CONTROL 0x0e +#define PU_DIGITAL_MULTIPLIER_LIMIT_CONTROL 0x0f +#define PU_HUE_AUTO_CONTROL 0x10 +#define PU_ANALOG_VIDEO_STANDARD_CONTROL 0x11 +#define PU_ANALOG_LOCK_STATUS_CONTROL 0x12 + +#define LXU_MOTOR_PANTILT_RELATIVE_CONTROL 0x01 +#define LXU_MOTOR_PANTILT_RESET_CONTROL 0x02 +#define LXU_MOTOR_FOCUS_MOTOR_CONTROL 0x03 + +/* VideoStreaming interface controls */ +#define VS_CONTROL_UNDEFINED 0x00 +#define VS_PROBE_CONTROL 0x01 +#define VS_COMMIT_CONTROL 0x02 +#define VS_STILL_PROBE_CONTROL 0x03 +#define VS_STILL_COMMIT_CONTROL 0x04 +#define VS_STILL_IMAGE_TRIGGER_CONTROL 0x05 +#define VS_STREAM_ERROR_CODE_CONTROL 0x06 +#define VS_GENERATE_KEY_FRAME_CONTROL 0x07 +#define VS_UPDATE_FRAME_SEGMENT_CONTROL 0x08 +#define VS_SYNC_DELAY_CONTROL 0x09 + +#define TT_VENDOR_SPECIFIC 0x0100 +#define TT_STREAMING 0x0101 + +/* Input Terminal types */ +#define ITT_VENDOR_SPECIFIC 0x0200 +#define ITT_CAMERA 0x0201 +#define ITT_MEDIA_TRANSPORT_INPUT 0x0202 + +/* Output Terminal types */ +#define OTT_VENDOR_SPECIFIC 0x0300 +#define OTT_DISPLAY 0x0301 +#define OTT_MEDIA_TRANSPORT_OUTPUT 0x0302 + +/* External Terminal types */ +#define EXTERNAL_VENDOR_SPECIFIC 0x0400 +#define COMPOSITE_CONNECTOR 0x0401 +#define SVIDEO_CONNECTOR 0x0402 +#define COMPONENT_CONNECTOR 0x0403 + +#define UVC_TERM_INPUT 0x0000 +#define UVC_TERM_OUTPUT 0x8000 + +#define UVC_ENTITY_TYPE(entity) ((entity)->type & 0x7fff) +#define UVC_ENTITY_IS_UNIT(entity) (((entity)->type & 0xff00) == 0) +#define UVC_ENTITY_IS_TERM(entity) (((entity)->type & 0xff00) != 0) +#define UVC_ENTITY_IS_ITERM(entity) \ + (((entity)->type & 0x8000) == UVC_TERM_INPUT) +#define UVC_ENTITY_IS_OTERM(entity) \ + (((entity)->type & 0x8000) == UVC_TERM_OUTPUT) + +#define UVC_STATUS_TYPE_CONTROL 1 +#define UVC_STATUS_TYPE_STREAMING 2 + +/* ------------------------------------------------------------------------ + * GUIDs + */ +#define UVC_GUID_UVC_CAMERA \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} +#define UVC_GUID_UVC_OUTPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02} +#define UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} +#define UVC_GUID_UVC_PROCESSING \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01} +#define UVC_GUID_UVC_SELECTOR \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02} + +#define UVC_GUID_LOGITECH_DEV_INFO \ + {0x82, 0x06, 0x61, 0x63, 0x70, 0x50, 0xab, 0x49, \ + 0xb8, 0xcc, 0xb3, 0x85, 0x5e, 0x8d, 0x22, 0x1e} +#define UVC_GUID_LOGITECH_USER_HW \ + {0x82, 0x06, 0x61, 0x63, 0x70, 0x50, 0xab, 0x49, \ + 0xb8, 0xcc, 0xb3, 0x85, 0x5e, 0x8d, 0x22, 0x1f} +#define UVC_GUID_LOGITECH_VIDEO \ + {0x82, 0x06, 0x61, 0x63, 0x70, 0x50, 0xab, 0x49, \ + 0xb8, 0xcc, 0xb3, 0x85, 0x5e, 0x8d, 0x22, 0x50} +#define UVC_GUID_LOGITECH_MOTOR \ + {0x82, 0x06, 0x61, 0x63, 0x70, 0x50, 0xab, 0x49, \ + 0xb8, 0xcc, 0xb3, 0x85, 0x5e, 0x8d, 0x22, 0x56} + +#define UVC_GUID_FORMAT_MJPEG \ + { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YUY2 \ + { 'Y', 'U', 'Y', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_NV12 \ + { 'N', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YV12 \ + { 'Y', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_I420 \ + { 'I', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_UYVY \ + { 'U', 'Y', 'V', 'Y', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y800 \ + { 'Y', '8', '0', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BY8 \ + { 'B', 'Y', '8', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + + +/* ------------------------------------------------------------------------ + * Driver specific constants. + */ + +#define DRIVER_VERSION_NUMBER KERNEL_VERSION(0, 1, 0) + +/* Number of isochronous URBs. */ +#define UVC_URBS 5 +/* Maximum number of packets per isochronous URB. */ +#define UVC_MAX_ISO_PACKETS 40 +/* Maximum frame size in bytes, for sanity checking. */ +#define UVC_MAX_FRAME_SIZE (16*1024*1024) +/* Maximum number of video buffers. */ +#define UVC_MAX_VIDEO_BUFFERS 32 + +#define UVC_CTRL_CONTROL_TIMEOUT 300 +#define UVC_CTRL_STREAMING_TIMEOUT 1000 + +/* Devices quirks */ +#define UVC_QUIRK_STATUS_INTERVAL 0x00000001 +#define UVC_QUIRK_PROBE_MINMAX 0x00000002 +#define UVC_QUIRK_PROBE_EXTRAFIELDS 0x00000004 +#define UVC_QUIRK_BUILTIN_ISIGHT 0x00000008 +#define UVC_QUIRK_STREAM_NO_FID 0x00000010 +#define UVC_QUIRK_IGNORE_SELECTOR_UNIT 0x00000020 + +/* Format flags */ +#define UVC_FMT_FLAG_COMPRESSED 0x00000001 +#define UVC_FMT_FLAG_STREAM 0x00000002 + +/* ------------------------------------------------------------------------ + * Structures. + */ + +struct uvc_device; + +/* TODO: Put the most frequently accessed fields at the beginning of + * structures to maximize cache efficiency. + */ +struct uvc_streaming_control { + __u16 bmHint; + __u8 bFormatIndex; + __u8 bFrameIndex; + __u32 dwFrameInterval; + __u16 wKeyFrameRate; + __u16 wPFrameRate; + __u16 wCompQuality; + __u16 wCompWindowSize; + __u16 wDelay; + __u32 dwMaxVideoFrameSize; + __u32 dwMaxPayloadTransferSize; + __u32 dwClockFrequency; + __u8 bmFramingInfo; + __u8 bPreferedVersion; + __u8 bMinVersion; + __u8 bMaxVersion; +}; + +struct uvc_menu_info { + __u32 value; + __u8 name[32]; +}; + +struct uvc_control_info { + struct list_head list; + struct list_head mappings; + + __u8 entity[16]; + __u8 index; + __u8 selector; + + __u16 size; + __u32 flags; +}; + +struct uvc_control_mapping { + struct list_head list; + + struct uvc_control_info *ctrl; + + __u32 id; + __u8 name[32]; + __u8 entity[16]; + __u8 selector; + + __u8 size; + __u8 offset; + enum v4l2_ctrl_type v4l2_type; + __u32 data_type; + + struct uvc_menu_info *menu_info; + __u32 menu_count; +}; + +struct uvc_control { + struct uvc_entity *entity; + struct uvc_control_info *info; + + __u8 index; /* Used to match the uvc_control entry with a + uvc_control_info. */ + __u8 dirty : 1, + loaded : 1, + modified : 1; + + __u8 *data; +}; + +struct uvc_format_desc { + char *name; + __u8 guid[16]; + __u32 fcc; +}; + +/* The term 'entity' refers to both UVC units and UVC terminals. + * + * The type field is either the terminal type (wTerminalType in the terminal + * descriptor), or the unit type (bDescriptorSubtype in the unit descriptor). + * As the bDescriptorSubtype field is one byte long, the type value will + * always have a null MSB for units. All terminal types defined by the UVC + * specification have a non-null MSB, so it is safe to use the MSB to + * differentiate between units and terminals as long as the descriptor parsing + * code makes sure terminal types have a non-null MSB. + * + * For terminals, the type's most significant bit stores the terminal + * direction (either UVC_TERM_INPUT or UVC_TERM_OUTPUT). The type field should + * always be accessed with the UVC_ENTITY_* macros and never directly. + */ + +struct uvc_entity { + struct list_head list; /* Entity as part of a UVC device. */ + struct list_head chain; /* Entity as part of a video device + * chain. */ + __u8 id; + __u16 type; + char name[64]; + + union { + struct { + __u16 wObjectiveFocalLengthMin; + __u16 wObjectiveFocalLengthMax; + __u16 wOcularFocalLength; + __u8 bControlSize; + __u8 *bmControls; + } camera; + + struct { + __u8 bControlSize; + __u8 *bmControls; + __u8 bTransportModeSize; + __u8 *bmTransportModes; + } media; + + struct { + __u8 bSourceID; + } output; + + struct { + __u8 bSourceID; + __u16 wMaxMultiplier; + __u8 bControlSize; + __u8 *bmControls; + __u8 bmVideoStandards; + } processing; + + struct { + __u8 bNrInPins; + __u8 *baSourceID; + } selector; + + struct { + __u8 guidExtensionCode[16]; + __u8 bNumControls; + __u8 bNrInPins; + __u8 *baSourceID; + __u8 bControlSize; + __u8 *bmControls; + __u8 *bmControlsType; + } extension; + }; + + unsigned int ncontrols; + struct uvc_control *controls; +}; + +struct uvc_frame { + __u8 bFrameIndex; + __u8 bmCapabilities; + __u16 wWidth; + __u16 wHeight; + __u32 dwMinBitRate; + __u32 dwMaxBitRate; + __u32 dwMaxVideoFrameBufferSize; + __u8 bFrameIntervalType; + __u32 dwDefaultFrameInterval; + __u32 *dwFrameInterval; +}; + +struct uvc_format { + __u8 type; + __u8 index; + __u8 bpp; + __u8 colorspace; + __u32 fcc; + __u32 flags; + + char name[32]; + + unsigned int nframes; + struct uvc_frame *frame; +}; + +struct uvc_streaming_header { + __u8 bNumFormats; + __u8 bEndpointAddress; + __u8 bTerminalLink; + __u8 bControlSize; + __u8 *bmaControls; + /* The following fields are used by input headers only. */ + __u8 bmInfo; + __u8 bStillCaptureMethod; + __u8 bTriggerSupport; + __u8 bTriggerUsage; +}; + +struct uvc_streaming { + struct list_head list; + + struct usb_interface *intf; + int intfnum; + __u16 maxpsize; + + struct uvc_streaming_header header; + + unsigned int nformats; + struct uvc_format *format; + + struct uvc_streaming_control ctrl; + struct uvc_format *cur_format; + struct uvc_frame *cur_frame; + + struct mutex mutex; +}; + +enum uvc_buffer_state { + UVC_BUF_STATE_IDLE = 0, + UVC_BUF_STATE_QUEUED = 1, + UVC_BUF_STATE_ACTIVE = 2, + UVC_BUF_STATE_DONE = 3, + UVC_BUF_STATE_ERROR = 4, +}; + +struct uvc_buffer { + unsigned long vma_use_count; + struct list_head stream; + + /* Touched by interrupt handler. */ + struct v4l2_buffer buf; + struct list_head queue; + wait_queue_head_t wait; + enum uvc_buffer_state state; +}; + +#define UVC_QUEUE_STREAMING (1 << 0) +#define UVC_QUEUE_DISCONNECTED (1 << 1) +#define UVC_QUEUE_DROP_INCOMPLETE (1 << 2) + +struct uvc_video_queue { + void *mem; + unsigned int flags; + __u32 sequence; + + unsigned int count; + unsigned int buf_size; + struct uvc_buffer buffer[UVC_MAX_VIDEO_BUFFERS]; + struct mutex mutex; /* protects buffers and mainqueue */ + spinlock_t irqlock; /* protects irqqueue */ + + struct list_head mainqueue; + struct list_head irqqueue; +}; + +struct uvc_video_device { + struct uvc_device *dev; + struct video_device *vdev; + atomic_t active; + unsigned int frozen : 1; + + struct list_head iterms; + struct uvc_entity *oterm; + struct uvc_entity *processing; + struct uvc_entity *selector; + struct list_head extensions; + struct mutex ctrl_mutex; + + struct uvc_video_queue queue; + + /* Video streaming object, must always be non-NULL. */ + struct uvc_streaming *streaming; + + void (*decode) (struct urb *urb, struct uvc_video_device *video, + struct uvc_buffer *buf); + + /* Context data used by the bulk completion handler. */ + struct { + __u8 header[256]; + unsigned int header_size; + int skip_payload; + __u32 payload_size; + __u32 max_payload_size; + } bulk; + + struct urb *urb[UVC_URBS]; + char *urb_buffer[UVC_URBS]; + + __u8 last_fid; +}; + +enum uvc_device_state { + UVC_DEV_DISCONNECTED = 1, +}; + +struct uvc_device { + struct usb_device *udev; + struct usb_interface *intf; + __u32 quirks; + int intfnum; + char name[32]; + + enum uvc_device_state state; + struct kref kref; + struct list_head list; + + /* Video control interface */ + __u16 uvc_version; + __u32 clock_frequency; + + struct list_head entities; + + struct uvc_video_device video; + + /* Status Interrupt Endpoint */ + struct usb_host_endpoint *int_ep; + struct urb *int_urb; + __u8 status[16]; + struct input_dev *input; + + /* Video Streaming interfaces */ + struct list_head streaming; +}; + +enum uvc_handle_state { + UVC_HANDLE_PASSIVE = 0, + UVC_HANDLE_ACTIVE = 1, +}; + +struct uvc_fh { + struct uvc_video_device *device; + enum uvc_handle_state state; +}; + +struct uvc_driver { + struct usb_driver driver; + + struct mutex open_mutex; /* protects from open/disconnect race */ + + struct list_head devices; /* struct uvc_device list */ + struct list_head controls; /* struct uvc_control_info list */ + struct mutex ctrl_mutex; /* protects controls and devices + lists */ +}; + +/* ------------------------------------------------------------------------ + * Debugging, printing and logging + */ + +#define UVC_TRACE_PROBE (1 << 0) +#define UVC_TRACE_DESCR (1 << 1) +#define UVC_TRACE_CONTROL (1 << 2) +#define UVC_TRACE_FORMAT (1 << 3) +#define UVC_TRACE_CAPTURE (1 << 4) +#define UVC_TRACE_CALLS (1 << 5) +#define UVC_TRACE_IOCTL (1 << 6) +#define UVC_TRACE_FRAME (1 << 7) +#define UVC_TRACE_SUSPEND (1 << 8) +#define UVC_TRACE_STATUS (1 << 9) + +extern unsigned int uvc_trace_param; + +#define uvc_trace(flag, msg...) \ + do { \ + if (uvc_trace_param & flag) \ + printk(KERN_DEBUG "uvcvideo: " msg); \ + } while (0) + +#define uvc_printk(level, msg...) \ + printk(level "uvcvideo: " msg) + +#define UVC_GUID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \ + "%02x%02x%02x%02x%02x%02x" +#define UVC_GUID_ARGS(guid) \ + (guid)[3], (guid)[2], (guid)[1], (guid)[0], \ + (guid)[5], (guid)[4], \ + (guid)[7], (guid)[6], \ + (guid)[8], (guid)[9], \ + (guid)[10], (guid)[11], (guid)[12], \ + (guid)[13], (guid)[14], (guid)[15] + +/* -------------------------------------------------------------------------- + * Internal functions. + */ + +/* Core driver */ +extern struct uvc_driver uvc_driver; +extern void uvc_delete(struct kref *kref); + +/* Video buffers queue management. */ +extern void uvc_queue_init(struct uvc_video_queue *queue); +extern int uvc_alloc_buffers(struct uvc_video_queue *queue, + unsigned int nbuffers, unsigned int buflength); +extern int uvc_free_buffers(struct uvc_video_queue *queue); +extern int uvc_query_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf); +extern int uvc_queue_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf); +extern int uvc_dequeue_buffer(struct uvc_video_queue *queue, + struct v4l2_buffer *v4l2_buf, int nonblocking); +extern int uvc_queue_enable(struct uvc_video_queue *queue, int enable); +extern void uvc_queue_cancel(struct uvc_video_queue *queue, int disconnect); +extern struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue, + struct uvc_buffer *buf); +extern unsigned int uvc_queue_poll(struct uvc_video_queue *queue, + struct file *file, poll_table *wait); +static inline int uvc_queue_streaming(struct uvc_video_queue *queue) +{ + return queue->flags & UVC_QUEUE_STREAMING; +} + +/* V4L2 interface */ +extern struct file_operations uvc_fops; + +/* Video */ +extern int uvc_video_init(struct uvc_video_device *video); +extern int uvc_video_suspend(struct uvc_video_device *video); +extern int uvc_video_resume(struct uvc_video_device *video); +extern int uvc_video_enable(struct uvc_video_device *video, int enable); +extern int uvc_probe_video(struct uvc_video_device *video, + struct uvc_streaming_control *probe); +extern int uvc_query_ctrl(struct uvc_device *dev, __u8 query, __u8 unit, + __u8 intfnum, __u8 cs, void *data, __u16 size); +extern int uvc_set_video_ctrl(struct uvc_video_device *video, + struct uvc_streaming_control *ctrl, int probe); + +/* Status */ +extern int uvc_status_init(struct uvc_device *dev); +extern void uvc_status_cleanup(struct uvc_device *dev); +extern int uvc_status_suspend(struct uvc_device *dev); +extern int uvc_status_resume(struct uvc_device *dev); + +/* Controls */ +extern struct uvc_control *uvc_find_control(struct uvc_video_device *video, + __u32 v4l2_id, struct uvc_control_mapping **mapping); +extern int uvc_query_v4l2_ctrl(struct uvc_video_device *video, + struct v4l2_queryctrl *v4l2_ctrl); + +extern int uvc_ctrl_add_info(struct uvc_control_info *info); +extern int uvc_ctrl_add_mapping(struct uvc_control_mapping *mapping); +extern int uvc_ctrl_init_device(struct uvc_device *dev); +extern void uvc_ctrl_cleanup_device(struct uvc_device *dev); +extern int uvc_ctrl_resume_device(struct uvc_device *dev); +extern void uvc_ctrl_init(void); + +extern int uvc_ctrl_begin(struct uvc_video_device *video); +extern int __uvc_ctrl_commit(struct uvc_video_device *video, int rollback); +static inline int uvc_ctrl_commit(struct uvc_video_device *video) +{ + return __uvc_ctrl_commit(video, 0); +} +static inline int uvc_ctrl_rollback(struct uvc_video_device *video) +{ + return __uvc_ctrl_commit(video, 1); +} + +extern int uvc_ctrl_get(struct uvc_video_device *video, + struct v4l2_ext_control *xctrl); +extern int uvc_ctrl_set(struct uvc_video_device *video, + struct v4l2_ext_control *xctrl); + +extern int uvc_xu_ctrl_query(struct uvc_video_device *video, + struct uvc_xu_control *ctrl, int set); + +/* Utility functions */ +extern void uvc_simplify_fraction(uint32_t *numerator, uint32_t *denominator, + unsigned int n_terms, unsigned int threshold); +extern uint32_t uvc_fraction_to_interval(uint32_t numerator, + uint32_t denominator); +extern struct usb_host_endpoint *uvc_find_endpoint( + struct usb_host_interface *alts, __u8 epaddr); + +/* Quirks support */ +void uvc_video_decode_isight(struct urb *urb, struct uvc_video_device *video, + struct uvc_buffer *buf); + +#endif /* __KERNEL__ */ + +#endif diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c index 31e8af0ba278..67a661cf5219 100644 --- a/drivers/media/video/videodev.c +++ b/drivers/media/video/videodev.c @@ -51,12 +51,51 @@ #define VIDEO_NUM_DEVICES 256 #define VIDEO_NAME "video4linux" +struct std_descr { + v4l2_std_id std; + const char *descr; +}; + +static const struct std_descr standards[] = { + { V4L2_STD_NTSC, "NTSC" }, + { V4L2_STD_NTSC_M, "NTSC-M" }, + { V4L2_STD_NTSC_M_JP, "NTSC-M-JP" }, + { V4L2_STD_NTSC_M_KR, "NTSC-M-KR" }, + { V4L2_STD_NTSC_443, "NTSC-443" }, + { V4L2_STD_PAL, "PAL" }, + { V4L2_STD_PAL_BG, "PAL-BG" }, + { V4L2_STD_PAL_B, "PAL-B" }, + { V4L2_STD_PAL_B1, "PAL-B1" }, + { V4L2_STD_PAL_G, "PAL-G" }, + { V4L2_STD_PAL_H, "PAL-H" }, + { V4L2_STD_PAL_I, "PAL-I" }, + { V4L2_STD_PAL_DK, "PAL-DK" }, + { V4L2_STD_PAL_D, "PAL-D" }, + { V4L2_STD_PAL_D1, "PAL-D1" }, + { V4L2_STD_PAL_K, "PAL-K" }, + { V4L2_STD_PAL_M, "PAL-M" }, + { V4L2_STD_PAL_N, "PAL-N" }, + { V4L2_STD_PAL_Nc, "PAL-Nc" }, + { V4L2_STD_PAL_60, "PAL-60" }, + { V4L2_STD_SECAM, "SECAM" }, + { V4L2_STD_SECAM_B, "SECAM-B" }, + { V4L2_STD_SECAM_G, "SECAM-G" }, + { V4L2_STD_SECAM_H, "SECAM-H" }, + { V4L2_STD_SECAM_DK, "SECAM-DK" }, + { V4L2_STD_SECAM_D, "SECAM-D" }, + { V4L2_STD_SECAM_K, "SECAM-K" }, + { V4L2_STD_SECAM_K1, "SECAM-K1" }, + { V4L2_STD_SECAM_L, "SECAM-L" }, + { V4L2_STD_SECAM_LC, "SECAM-Lc" }, + { 0, "Unknown" } +}; + /* video4linux standard ID conversion to standard name */ -char *v4l2_norm_to_name(v4l2_std_id id) +const char *v4l2_norm_to_name(v4l2_std_id id) { - char *name; u32 myid = id; + int i; /* HACK: ppc32 architecture doesn't have __ucmpdi2 function to handle 64 bit comparations. So, on that architecture, with some gcc @@ -64,110 +103,17 @@ char *v4l2_norm_to_name(v4l2_std_id id) */ BUG_ON(myid != id); - switch (myid) { - case V4L2_STD_PAL: - name = "PAL"; - break; - case V4L2_STD_PAL_BG: - name = "PAL-BG"; - break; - case V4L2_STD_PAL_DK: - name = "PAL-DK"; - break; - case V4L2_STD_PAL_B: - name = "PAL-B"; - break; - case V4L2_STD_PAL_B1: - name = "PAL-B1"; - break; - case V4L2_STD_PAL_G: - name = "PAL-G"; - break; - case V4L2_STD_PAL_H: - name = "PAL-H"; - break; - case V4L2_STD_PAL_I: - name = "PAL-I"; - break; - case V4L2_STD_PAL_D: - name = "PAL-D"; - break; - case V4L2_STD_PAL_D1: - name = "PAL-D1"; - break; - case V4L2_STD_PAL_K: - name = "PAL-K"; - break; - case V4L2_STD_PAL_M: - name = "PAL-M"; - break; - case V4L2_STD_PAL_N: - name = "PAL-N"; - break; - case V4L2_STD_PAL_Nc: - name = "PAL-Nc"; - break; - case V4L2_STD_PAL_60: - name = "PAL-60"; - break; - case V4L2_STD_NTSC: - name = "NTSC"; - break; - case V4L2_STD_NTSC_M: - name = "NTSC-M"; - break; - case V4L2_STD_NTSC_M_JP: - name = "NTSC-M-JP"; - break; - case V4L2_STD_NTSC_443: - name = "NTSC-443"; - break; - case V4L2_STD_NTSC_M_KR: - name = "NTSC-M-KR"; - break; - case V4L2_STD_SECAM: - name = "SECAM"; - break; - case V4L2_STD_SECAM_DK: - name = "SECAM-DK"; - break; - case V4L2_STD_SECAM_B: - name = "SECAM-B"; - break; - case V4L2_STD_SECAM_D: - name = "SECAM-D"; - break; - case V4L2_STD_SECAM_G: - name = "SECAM-G"; - break; - case V4L2_STD_SECAM_H: - name = "SECAM-H"; - break; - case V4L2_STD_SECAM_K: - name = "SECAM-K"; - break; - case V4L2_STD_SECAM_K1: - name = "SECAM-K1"; - break; - case V4L2_STD_SECAM_L: - name = "SECAM-L"; - break; - case V4L2_STD_SECAM_LC: - name = "SECAM-LC"; - break; - default: - name = "Unknown"; - break; - } - - return name; + for (i = 0; standards[i].std; i++) + if (myid == standards[i].std) + break; + return standards[i].descr; } EXPORT_SYMBOL(v4l2_norm_to_name); /* Fill in the fields of a v4l2_standard structure according to the 'id' and 'transmission' parameters. Returns negative on error. */ int v4l2_video_std_construct(struct v4l2_standard *vs, - int id, char *name) + int id, const char *name) { u32 index = vs->index; @@ -1218,95 +1164,40 @@ static int __video_do_ioctl(struct inode *inode, struct file *file, case VIDIOC_ENUMSTD: { struct v4l2_standard *p = arg; - v4l2_std_id id = vfd->tvnorms,curr_id=0; - unsigned int index = p->index,i; - - if (index<0) { - ret=-EINVAL; - break; - } - - /* Return norm array on a canonical way */ - for (i=0;i<= index && id; i++) { - if ( (id & V4L2_STD_PAL) == V4L2_STD_PAL) { - curr_id = V4L2_STD_PAL; - } else if ( (id & V4L2_STD_PAL_BG) == V4L2_STD_PAL_BG) { - curr_id = V4L2_STD_PAL_BG; - } else if ( (id & V4L2_STD_PAL_DK) == V4L2_STD_PAL_DK) { - curr_id = V4L2_STD_PAL_DK; - } else if ( (id & V4L2_STD_PAL_B) == V4L2_STD_PAL_B) { - curr_id = V4L2_STD_PAL_B; - } else if ( (id & V4L2_STD_PAL_B1) == V4L2_STD_PAL_B1) { - curr_id = V4L2_STD_PAL_B1; - } else if ( (id & V4L2_STD_PAL_G) == V4L2_STD_PAL_G) { - curr_id = V4L2_STD_PAL_G; - } else if ( (id & V4L2_STD_PAL_H) == V4L2_STD_PAL_H) { - curr_id = V4L2_STD_PAL_H; - } else if ( (id & V4L2_STD_PAL_I) == V4L2_STD_PAL_I) { - curr_id = V4L2_STD_PAL_I; - } else if ( (id & V4L2_STD_PAL_D) == V4L2_STD_PAL_D) { - curr_id = V4L2_STD_PAL_D; - } else if ( (id & V4L2_STD_PAL_D1) == V4L2_STD_PAL_D1) { - curr_id = V4L2_STD_PAL_D1; - } else if ( (id & V4L2_STD_PAL_K) == V4L2_STD_PAL_K) { - curr_id = V4L2_STD_PAL_K; - } else if ( (id & V4L2_STD_PAL_M) == V4L2_STD_PAL_M) { - curr_id = V4L2_STD_PAL_M; - } else if ( (id & V4L2_STD_PAL_N) == V4L2_STD_PAL_N) { - curr_id = V4L2_STD_PAL_N; - } else if ( (id & V4L2_STD_PAL_Nc) == V4L2_STD_PAL_Nc) { - curr_id = V4L2_STD_PAL_Nc; - } else if ( (id & V4L2_STD_PAL_60) == V4L2_STD_PAL_60) { - curr_id = V4L2_STD_PAL_60; - } else if ( (id & V4L2_STD_NTSC) == V4L2_STD_NTSC) { - curr_id = V4L2_STD_NTSC; - } else if ( (id & V4L2_STD_NTSC_M) == V4L2_STD_NTSC_M) { - curr_id = V4L2_STD_NTSC_M; - } else if ( (id & V4L2_STD_NTSC_M_JP) == V4L2_STD_NTSC_M_JP) { - curr_id = V4L2_STD_NTSC_M_JP; - } else if ( (id & V4L2_STD_NTSC_443) == V4L2_STD_NTSC_443) { - curr_id = V4L2_STD_NTSC_443; - } else if ( (id & V4L2_STD_NTSC_M_KR) == V4L2_STD_NTSC_M_KR) { - curr_id = V4L2_STD_NTSC_M_KR; - } else if ( (id & V4L2_STD_SECAM) == V4L2_STD_SECAM) { - curr_id = V4L2_STD_SECAM; - } else if ( (id & V4L2_STD_SECAM_DK) == V4L2_STD_SECAM_DK) { - curr_id = V4L2_STD_SECAM_DK; - } else if ( (id & V4L2_STD_SECAM_B) == V4L2_STD_SECAM_B) { - curr_id = V4L2_STD_SECAM_B; - } else if ( (id & V4L2_STD_SECAM_D) == V4L2_STD_SECAM_D) { - curr_id = V4L2_STD_SECAM_D; - } else if ( (id & V4L2_STD_SECAM_G) == V4L2_STD_SECAM_G) { - curr_id = V4L2_STD_SECAM_G; - } else if ( (id & V4L2_STD_SECAM_H) == V4L2_STD_SECAM_H) { - curr_id = V4L2_STD_SECAM_H; - } else if ( (id & V4L2_STD_SECAM_K) == V4L2_STD_SECAM_K) { - curr_id = V4L2_STD_SECAM_K; - } else if ( (id & V4L2_STD_SECAM_K1) == V4L2_STD_SECAM_K1) { - curr_id = V4L2_STD_SECAM_K1; - } else if ( (id & V4L2_STD_SECAM_L) == V4L2_STD_SECAM_L) { - curr_id = V4L2_STD_SECAM_L; - } else if ( (id & V4L2_STD_SECAM_LC) == V4L2_STD_SECAM_LC) { - curr_id = V4L2_STD_SECAM_LC; - } else { + v4l2_std_id id = vfd->tvnorms, curr_id = 0; + unsigned int index = p->index, i, j = 0; + const char *descr = ""; + + /* Return norm array in a canonical way */ + for (i = 0; i <= index && id; i++) { + /* last std value in the standards array is 0, so this + while always ends there since (id & 0) == 0. */ + while ((id & standards[j].std) != standards[j].std) + j++; + curr_id = standards[j].std; + descr = standards[j].descr; + j++; + if (curr_id == 0) break; - } - id &= ~curr_id; + if (curr_id != V4L2_STD_PAL && + curr_id != V4L2_STD_SECAM && + curr_id != V4L2_STD_NTSC) + id &= ~curr_id; } - if (i<=index) + if (i <= index) return -EINVAL; - v4l2_video_std_construct(p, curr_id,v4l2_norm_to_name(curr_id)); + v4l2_video_std_construct(p, curr_id, descr); p->index = index; - dbgarg (cmd, "index=%d, id=%Ld, name=%s, fps=%d/%d, " + dbgarg(cmd, "index=%d, id=%Ld, name=%s, fps=%d/%d, " "framelines=%d\n", p->index, (unsigned long long)p->id, p->name, p->frameperiod.numerator, p->frameperiod.denominator, p->framelines); - ret=0; + ret = 0; break; } case VIDIOC_G_STD: diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c index 845be1864f68..5ff9a58b6135 100644 --- a/drivers/media/video/vivi.c +++ b/drivers/media/video/vivi.c @@ -327,13 +327,14 @@ static void vivi_fillbuff(struct vivi_dev *dev, struct vivi_buffer *buf) int hmax = buf->vb.height; int wmax = buf->vb.width; struct timeval ts; - char *tmpbuf = kmalloc(wmax * 2, GFP_ATOMIC); + char *tmpbuf; void *vbuf = videobuf_to_vmalloc(&buf->vb); - if (!tmpbuf) + if (!vbuf) return; - if (!vbuf) + tmpbuf = kmalloc(wmax * 2, GFP_ATOMIC); + if (!tmpbuf) return; for (h = 0; h < hmax; h++) { diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 65210fca37ed..d89475d36988 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -114,6 +114,7 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) unsigned int nob = data->blocks; unsigned long long clks; unsigned int timeout; + bool dalgn = 0; u32 dcmd; int i; @@ -152,6 +153,9 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) host->sg_cpu[i].dcmd = dcmd | length; if (length & 31 && !(data->flags & MMC_DATA_READ)) host->sg_cpu[i].dcmd |= DCMD_ENDIRQEN; + /* Not aligned to 8-byte boundary? */ + if (sg_dma_address(&data->sg[i]) & 0x7) + dalgn = 1; if (data->flags & MMC_DATA_READ) { host->sg_cpu[i].dsadr = host->res->start + MMC_RXFIFO; host->sg_cpu[i].dtadr = sg_dma_address(&data->sg[i]); @@ -165,6 +169,15 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) host->sg_cpu[host->dma_len - 1].ddadr = DDADR_STOP; wmb(); + /* + * The PXA27x DMA controller encounters overhead when working with + * unaligned (to 8-byte boundaries) data, so switch on byte alignment + * mode only if we have unaligned data. + */ + if (dalgn) + DALGN |= (1 << host->dma); + else + DALGN &= (1 << host->dma); DDADR(host->dma) = host->sg_dma; DCSR(host->dma) = DCSR_RUN; } diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 07c2048b230b..b413aa6c246b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -55,6 +55,10 @@ static unsigned int debug_quirks = 0; #define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7) /* Controller needs to be reset after each request to stay stable */ #define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) +/* Controller needs voltage and power writes to happen separately */ +#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) +/* Controller has an off-by-one issue with timeout value */ +#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1<<10) static const struct pci_device_id pci_ids[] __devinitdata = { { @@ -115,7 +119,8 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS, + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, }, { @@ -124,7 +129,17 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS, + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_MARVELL, + .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | + SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, }, { @@ -469,6 +484,13 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) break; } + /* + * Compensate for an off-by-one error in the CaFe hardware; otherwise, + * a too-small count gives us interrupt timeouts. + */ + if ((host->chip->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) + count++; + if (count >= 0xF) { printk(KERN_WARNING "%s: Too large timeout requested!\n", mmc_hostname(host->mmc)); @@ -774,6 +796,14 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) BUG(); } + /* + * At least the CaFe chip gets confused if we set the voltage + * and set turn on power at the same time, so set the voltage first. + */ + if ((host->chip->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) + writeb(pwr & ~SDHCI_POWER_ON, + host->ioaddr + SDHCI_POWER_CONTROL); + writeb(pwr, host->ioaddr + SDHCI_POWER_CONTROL); out: diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 2edda8cc7f99..aabad8ce7458 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -1768,9 +1768,10 @@ vortex_timer(unsigned long data) case XCVR_MII: case XCVR_NWAY: { ok = 1; - spin_lock_bh(&vp->lock); + /* Interrupts are already disabled */ + spin_lock(&vp->lock); vortex_check_media(dev, 0); - spin_unlock_bh(&vp->lock); + spin_unlock(&vp->lock); } break; default: /* Other media types handled by Tx timeouts. */ diff --git a/drivers/net/e100.c b/drivers/net/e100.c index f3cba5e24ec5..1037b1332312 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1803,6 +1803,8 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) if (rx->prev->skb) { struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data; put_unaligned_le32(rx->dma_addr, &prev_rfd->link); + pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr, + sizeof(struct rfd), PCI_DMA_TODEVICE); } return 0; diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index 701531e72e7b..a3f6a9c72ec8 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -347,7 +347,7 @@ e1000_set_tso(struct net_device *netdev, u32 data) else netdev->features &= ~NETIF_F_TSO; - if (data) + if (data && (adapter->hw.mac_type > e1000_82547_rev_2)) netdev->features |= NETIF_F_TSO6; else netdev->features &= ~NETIF_F_TSO6; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index cab1835173cd..648a87bbf467 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2535,7 +2535,8 @@ void e1000e_down(struct e1000_adapter *adapter) adapter->link_speed = 0; adapter->link_duplex = 0; - e1000e_reset(adapter); + if (!pci_channel_offline(adapter->pdev)) + e1000e_reset(adapter); e1000_clean_tx_ring(adapter); e1000_clean_rx_ring(adapter); diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index 0b94833e23f7..e8cfadefa4b6 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -1077,8 +1077,6 @@ static inline void rx_off(struct scc_priv *priv) static void start_timer(struct scc_priv *priv, int t, int r15) { - unsigned long flags; - outb(priv->tmr_mode, priv->tmr_ctrl); if (t == 0) { tm_isr(priv); diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ae398f04c7b4..e79a26a886c8 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -718,7 +718,8 @@ void igb_down(struct igb_adapter *adapter) adapter->link_speed = 0; adapter->link_duplex = 0; - igb_reset(adapter); + if (!pci_channel_offline(adapter->pdev)) + igb_reset(adapter); igb_clean_all_tx_rings(adapter); igb_clean_all_rx_rings(adapter); } diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c index 679a0826780e..2c03f4e2ccc4 100644 --- a/drivers/net/ipg.c +++ b/drivers/net/ipg.c @@ -1271,7 +1271,7 @@ static void ipg_nic_rx_with_end(struct net_device *dev, framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN; - endframeLen = framelen - jumbo->current_size; + endframelen = framelen - jumbo->current_size; /* if (framelen > IPG_RXFRAG_SIZE) framelen=IPG_RXFRAG_SIZE; @@ -1279,8 +1279,8 @@ static void ipg_nic_rx_with_end(struct net_device *dev, if (framelen > IPG_RXSUPPORT_SIZE) dev_kfree_skb_irq(jumbo->skb); else { - memcpy(skb_put(jumbo->skb, endframeLen), - skb->data, endframeLen); + memcpy(skb_put(jumbo->skb, endframelen), + skb->data, endframelen); jumbo->skb->protocol = eth_type_trans(jumbo->skb, dev); @@ -1352,16 +1352,16 @@ static int ipg_nic_rx(struct net_device *dev) switch (ipg_nic_rx_check_frame_type(dev)) { case FRAME_WITH_START_WITH_END: - ipg_nic_rx_with_start_and_end(dev, tp, rxfd, entry); + ipg_nic_rx_with_start_and_end(dev, sp, rxfd, entry); break; case FRAME_WITH_START: - ipg_nic_rx_with_start(dev, tp, rxfd, entry); + ipg_nic_rx_with_start(dev, sp, rxfd, entry); break; case FRAME_WITH_END: - ipg_nic_rx_with_end(dev, tp, rxfd, entry); + ipg_nic_rx_with_end(dev, sp, rxfd, entry); break; case FRAME_NO_START_NO_END: - ipg_nic_rx_no_start_no_end(dev, tp, rxfd, entry); + ipg_nic_rx_no_start_no_end(dev, sp, rxfd, entry); break; } } @@ -1808,7 +1808,7 @@ static int ipg_nic_open(struct net_device *dev) /* initialize JUMBO Frame control variable */ sp->jumbo.found_start = 0; sp->jumbo.current_size = 0; - sp->jumbo.skb = 0; + sp->jumbo.skb = NULL; dev->mtu = IPG_TXFRAG_SIZE; #endif diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7b859220c255..8f0460901153 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1969,7 +1969,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) netif_carrier_off(netdev); netif_stop_queue(netdev); - ixgbe_reset(adapter); + if (!pci_channel_offline(adapter->pdev)) + ixgbe_reset(adapter); ixgbe_clean_all_tx_rings(adapter); ixgbe_clean_all_rx_rings(adapter); diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 6797ed069f1f..63cd67b931e7 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -71,14 +71,18 @@ static irqreturn_t netxen_intr(int irq, void *data); static irqreturn_t netxen_msi_intr(int irq, void *data); /* PCI Device ID Table */ +#define ENTRY(device) \ + {PCI_DEVICE(0x4040, (device)), \ + .class = PCI_CLASS_NETWORK_ETHERNET << 8, .class_mask = ~0} + static struct pci_device_id netxen_pci_tbl[] __devinitdata = { - {PCI_DEVICE(0x4040, 0x0001), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0002), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0003), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0004), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0005), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0024), PCI_DEVICE_CLASS(0x020000, ~0)}, - {PCI_DEVICE(0x4040, 0x0025), PCI_DEVICE_CLASS(0x020000, ~0)}, + ENTRY(0x0001), + ENTRY(0x0002), + ENTRY(0x0003), + ENTRY(0x0004), + ENTRY(0x0005), + ENTRY(0x0024), + ENTRY(0x0025), {0,} }; diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index ce95c5d168fe..70d012e90dcf 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -525,12 +525,14 @@ static int axnet_open(struct net_device *dev) int ret; axnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; + unsigned int nic_base = dev->base_addr; DEBUG(2, "axnet_open('%s')\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; + outb_p(0xFF, nic_base + EN0_ISR); /* Clear bogus intr. */ ret = request_irq(dev->irq, ei_irq_wrapper, IRQF_SHARED, "axnet_cs", dev); if (ret) return ret; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index fd8158a86f64..2d4c4ad89b8d 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -969,6 +969,7 @@ static int pcnet_open(struct net_device *dev) int ret; pcnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; + unsigned int nic_base = dev->base_addr; DEBUG(2, "pcnet_open('%s')\n", dev->name); @@ -976,6 +977,8 @@ static int pcnet_open(struct net_device *dev) return -ENODEV; set_misc_reg(dev); + + outb_p(0xFF, nic_base + EN0_ISR); /* Clear bogus intr. */ ret = request_irq(dev->irq, ei_irq_wrapper, IRQF_SHARED, dev_info, dev); if (ret) return ret; diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index b7f7b2227d56..bccee68bd48a 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -3701,7 +3701,9 @@ static int ql_cycle_adapter(struct ql3_adapter *qdev, int reset) printk(KERN_ERR PFX "%s: Driver up/down cycle failed, " "closing device\n",qdev->ndev->name); + rtnl_lock(); dev_close(qdev->ndev); + rtnl_unlock(); return -1; } return 0; diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 858b191517b3..504a48ff73c8 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -273,7 +273,7 @@ static void r6040_init_ring_desc(struct r6040_descriptor *desc_ring, dma_addr_t mapping = desc_dma; while (size-- > 0) { - mapping += sizeof(sizeof(*desc)); + mapping += sizeof(*desc); desc->ndesc = cpu_to_le32(mapping); desc->vndescp = desc + 1; desc++; diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index b5c1e663417d..ae7b697456b4 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2625,9 +2625,7 @@ static int fill_rx_buffers(struct ring_info *ring) rxdp1->Buffer0_ptr = pci_map_single (ring->pdev, skb->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if( (rxdp1->Buffer0_ptr == 0) || - (rxdp1->Buffer0_ptr == - DMA_ERROR_CODE)) + if(pci_dma_mapping_error(rxdp1->Buffer0_ptr)) goto pci_map_failed; rxdp->Control_2 = @@ -2657,6 +2655,7 @@ static int fill_rx_buffers(struct ring_info *ring) skb->data = (void *) (unsigned long)tmp; skb_reset_tail_pointer(skb); + /* AK: check is wrong. 0 can be valid dma address */ if (!(rxdp3->Buffer0_ptr)) rxdp3->Buffer0_ptr = pci_map_single(ring->pdev, ba->ba_0, @@ -2665,8 +2664,7 @@ static int fill_rx_buffers(struct ring_info *ring) pci_dma_sync_single_for_device(ring->pdev, (dma_addr_t) rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer0_ptr == 0) || - (rxdp3->Buffer0_ptr == DMA_ERROR_CODE)) + if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) goto pci_map_failed; rxdp->Control_2 = SET_BUFFER0_SIZE_3(BUF0_LEN); @@ -2681,18 +2679,17 @@ static int fill_rx_buffers(struct ring_info *ring) (ring->pdev, skb->data, ring->mtu + 4, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer2_ptr == 0) || - (rxdp3->Buffer2_ptr == DMA_ERROR_CODE)) + if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) goto pci_map_failed; + /* AK: check is wrong */ if (!rxdp3->Buffer1_ptr) rxdp3->Buffer1_ptr = pci_map_single(ring->pdev, ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer1_ptr == 0) || - (rxdp3->Buffer1_ptr == DMA_ERROR_CODE)) { + if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) { pci_unmap_single (ring->pdev, (dma_addr_t)(unsigned long) @@ -4264,16 +4261,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Buffer_Pointer = pci_map_single(sp->pdev, fifo->ufo_in_band_v, sizeof(u64), PCI_DMA_TODEVICE); - if((txdp->Buffer_Pointer == 0) || - (txdp->Buffer_Pointer == DMA_ERROR_CODE)) + if (pci_dma_mapping_error(txdp->Buffer_Pointer)) goto pci_map_failed; txdp++; } txdp->Buffer_Pointer = pci_map_single (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); - if((txdp->Buffer_Pointer == 0) || - (txdp->Buffer_Pointer == DMA_ERROR_CODE)) + if (pci_dma_mapping_error(txdp->Buffer_Pointer)) goto pci_map_failed; txdp->Host_Control = (unsigned long) skb; @@ -6884,10 +6879,8 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single( sp->pdev, (*skb)->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if( (rxdp1->Buffer0_ptr == 0) || - (rxdp1->Buffer0_ptr == DMA_ERROR_CODE)) { + if (pci_dma_mapping_error(rxdp1->Buffer0_ptr)) goto memalloc_failed; - } rxdp->Host_Control = (unsigned long) (*skb); } } else if ((sp->rxd_mode == RXD_MODE_3B) && (rxdp->Host_Control == 0)) { @@ -6913,15 +6906,12 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single(sp->pdev, (*skb)->data, dev->mtu + 4, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer2_ptr == 0) || - (rxdp3->Buffer2_ptr == DMA_ERROR_CODE)) { + if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) goto memalloc_failed; - } rxdp3->Buffer0_ptr = *temp0 = pci_map_single( sp->pdev, ba->ba_0, BUF0_LEN, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer0_ptr == 0) || - (rxdp3->Buffer0_ptr == DMA_ERROR_CODE)) { + if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer2_ptr, dev->mtu + 4, PCI_DMA_FROMDEVICE); @@ -6933,8 +6923,7 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, rxdp3->Buffer1_ptr = *temp1 = pci_map_single(sp->pdev, ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if( (rxdp3->Buffer1_ptr == 0) || - (rxdp3->Buffer1_ptr == DMA_ERROR_CODE)) { + if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 4706f7f9acb6..1827b6686c98 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -75,10 +75,6 @@ static int debug_level = ERR_DBG; /* DEBUG message print. */ #define DBG_PRINT(dbg_level, args...) if(!(debug_level<dbg_level)) printk(args) -#ifndef DMA_ERROR_CODE -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) -#endif - /* Protocol assist features of the NIC */ #define L3_CKSUM_OK 0xFFFF #define L4_CKSUM_OK 0xFFFF diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index 10e4e85da3fc..b07b8cbadeaf 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -1394,6 +1394,7 @@ tc35815_open(struct net_device *dev) tc35815_chip_init(dev); spin_unlock_irq(&lp->lock); + netif_carrier_off(dev); /* schedule a link state check */ phy_start(lp->phy_dev); @@ -1735,7 +1736,6 @@ tc35815_rx(struct net_device *dev) skb = lp->rx_skbs[cur_bd].skb; prefetch(skb->data); lp->rx_skbs[cur_bd].skb = NULL; - lp->fbl_count--; pci_unmap_single(lp->pci_dev, lp->rx_skbs[cur_bd].skb_dma, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); @@ -1791,6 +1791,7 @@ tc35815_rx(struct net_device *dev) #ifdef TC35815_USE_PACKEDBUFFER while (lp->fbl_curid != id) #else + lp->fbl_count--; while (lp->fbl_count < RX_BUF_NUM) #endif { @@ -2453,6 +2454,7 @@ static int tc35815_resume(struct pci_dev *pdev) return 0; pci_set_power_state(pdev, PCI_D0); tc35815_restart(dev); + netif_carrier_off(dev); if (lp->phy_dev) phy_start(lp->phy_dev); netif_device_attach(dev); diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 249e18053d5f..069f8bb0a99f 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -32,6 +32,7 @@ #include <linux/x25.h> #include <linux/lapb.h> #include <linux/init.h> +#include <linux/rtnetlink.h> #include "x25_asy.h" #include <net/x25device.h> @@ -601,8 +602,10 @@ static void x25_asy_close_tty(struct tty_struct *tty) if (!sl || sl->magic != X25_ASY_MAGIC) return; + rtnl_lock(); if (sl->dev->flags & IFF_UP) dev_close(sl->dev); + rtnl_unlock(); tty->disc_data = NULL; sl->tty = NULL; diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c index 36a9c42df835..76f4c7bad8b8 100644 --- a/drivers/net/wireless/b43/leds.c +++ b/drivers/net/wireless/b43/leds.c @@ -72,6 +72,9 @@ static void b43_led_brightness_set(struct led_classdev *led_dev, struct b43_wldev *dev = led->dev; bool radio_enabled; + if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED)) + return; + /* Checking the radio-enabled status here is slightly racy, * but we want to avoid the locking overhead and we don't care * whether the LED has the wrong state for a second. */ diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index fa4b0d8b74a2..a70827793086 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -2883,12 +2883,11 @@ static int b43_op_tx(struct ieee80211_hw *hw, if (unlikely(skb->len < 2 + 2 + 6)) { /* Too short, this can't be a valid frame. */ - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + goto drop_packet; } B43_WARN_ON(skb_shinfo(skb)->nr_frags); if (unlikely(!dev)) - return NETDEV_TX_BUSY; + goto drop_packet; /* Transmissions on seperate queues can run concurrently. */ read_lock_irqsave(&wl->tx_lock, flags); @@ -2904,7 +2903,12 @@ static int b43_op_tx(struct ieee80211_hw *hw, read_unlock_irqrestore(&wl->tx_lock, flags); if (unlikely(err)) - return NETDEV_TX_BUSY; + goto drop_packet; + return NETDEV_TX_OK; + +drop_packet: + /* We can not transmit this packet. Drop it. */ + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index c990f87b107a..93ddc1cbcc8b 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -876,6 +876,7 @@ struct b43legacy_dmaring *b43legacy_setup_dmaring(struct b43legacy_wldev *dev, if (!ring) goto out; ring->type = type; + ring->dev = dev; nr_slots = B43legacy_RXRING_SLOTS; if (for_tx) @@ -922,7 +923,6 @@ struct b43legacy_dmaring *b43legacy_setup_dmaring(struct b43legacy_wldev *dev, DMA_TO_DEVICE); } - ring->dev = dev; ring->nr_slots = nr_slots; ring->mmio_base = b43legacy_dmacontroller_base(type, controller_index); ring->index = controller_index; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 204077c13870..3e612d0a13e8 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2378,8 +2378,10 @@ static int b43legacy_op_tx(struct ieee80211_hw *hw, } else err = b43legacy_dma_tx(dev, skb, ctl); out: - if (unlikely(err)) - return NETDEV_TX_BUSY; + if (unlikely(err)) { + /* Drop the packet. */ + dev_kfree_skb_any(skb); + } return NETDEV_TX_OK; } diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c index 4fd73809602e..020f450e9dba 100644 --- a/drivers/net/wireless/hostap/hostap_80211_rx.c +++ b/drivers/net/wireless/hostap/hostap_80211_rx.c @@ -64,7 +64,7 @@ int prism2_rx_80211(struct net_device *dev, struct sk_buff *skb, int hdrlen, phdrlen, head_need, tail_need; u16 fc; int prism_header, ret; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr_4addr *fhdr; iface = netdev_priv(dev); local = iface->local; @@ -83,8 +83,8 @@ int prism2_rx_80211(struct net_device *dev, struct sk_buff *skb, phdrlen = 0; } - hdr = (struct ieee80211_hdr_4addr *) skb->data; - fc = le16_to_cpu(hdr->frame_ctl); + fhdr = (struct ieee80211_hdr_4addr *) skb->data; + fc = le16_to_cpu(fhdr->frame_ctl); if (type == PRISM2_RX_MGMT && (fc & IEEE80211_FCTL_VERS)) { printk(KERN_DEBUG "%s: dropped management frame with header " @@ -551,7 +551,7 @@ hostap_rx_frame_wds(local_info_t *local, struct ieee80211_hdr_4addr *hdr, hdr->addr1[2] != 0xff || hdr->addr1[3] != 0xff || hdr->addr1[4] != 0xff || hdr->addr1[5] != 0xff)) { /* RA (or BSSID) is not ours - drop */ - PDEBUG(DEBUG_EXTRA, "%s: received WDS frame with " + PDEBUG(DEBUG_EXTRA2, "%s: received WDS frame with " "not own or broadcast %s=%s\n", local->dev->name, fc & IEEE80211_FCTL_FROMDS ? "RA" : "BSSID", diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index 0acd9589c48c..ab981afd481d 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -1930,7 +1930,7 @@ static void handle_pspoll(local_info_t *local, PDEBUG(DEBUG_PS, " PSPOLL and AID[15:14] not set\n"); return; } - aid &= ~BIT(15) & ~BIT(14); + aid &= ~(BIT(15) | BIT(14)); if (aid == 0 || aid > MAX_AID_TABLE_SIZE) { PDEBUG(DEBUG_PS, " invalid aid=%d\n", aid); return; diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ed4317a17cbb..80039a0ae027 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -533,10 +533,10 @@ static void prism2_detach(struct pcmcia_device *link) do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) #define CFG_CHECK2(fn, retf) \ -do { int ret = (retf); \ -if (ret != 0) { \ - PDEBUG(DEBUG_EXTRA, "CardServices(" #fn ") returned %d\n", ret); \ - cs_error(link, fn, ret); \ +do { int _ret = (retf); \ +if (_ret != 0) { \ + PDEBUG(DEBUG_EXTRA, "CardServices(" #fn ") returned %d\n", _ret); \ + cs_error(link, fn, _ret); \ goto next_entry; \ } \ } while (0) diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index cdf90c40f11b..936f52e3d95c 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -2835,7 +2835,7 @@ static void hostap_passive_scan(unsigned long data) { local_info_t *local = (local_info_t *) data; struct net_device *dev = local->dev; - u16 channel; + u16 chan; if (local->passive_scan_interval <= 0) return; @@ -2872,11 +2872,11 @@ static void hostap_passive_scan(unsigned long data) printk(KERN_DEBUG "%s: passive scan channel %d\n", dev->name, local->passive_scan_channel); - channel = local->passive_scan_channel; + chan = local->passive_scan_channel; local->passive_scan_state = PASSIVE_SCAN_WAIT; local->passive_scan_timer.expires = jiffies + HZ / 10; } else { - channel = local->channel; + chan = local->channel; local->passive_scan_state = PASSIVE_SCAN_LISTEN; local->passive_scan_timer.expires = jiffies + local->passive_scan_interval * HZ; @@ -2884,9 +2884,9 @@ static void hostap_passive_scan(unsigned long data) if (hfa384x_cmd_callback(dev, HFA384X_CMDCODE_TEST | (HFA384X_TEST_CHANGE_CHANNEL << 8), - channel, NULL, 0)) + chan, NULL, 0)) printk(KERN_ERR "%s: passive scan channel set %d " - "failed\n", dev->name, channel); + "failed\n", dev->name, chan); add_timer(&local->passive_scan_timer); } diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index f7aec9309d04..a38e85f334df 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -594,7 +594,8 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx) } -int hostap_80211_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int hostap_80211_header_parse(const struct sk_buff *skb, + unsigned char *haddr) { struct hostap_interface *iface = netdev_priv(skb->dev); local_info_t *local = iface->local; @@ -857,7 +858,6 @@ const struct header_ops hostap_80211_ops = { .rebuild = eth_rebuild_header, .cache = eth_header_cache, .cache_update = eth_header_cache_update, - .parse = hostap_80211_header_parse, }; EXPORT_SYMBOL(hostap_80211_ops); @@ -1150,7 +1150,6 @@ EXPORT_SYMBOL(hostap_set_roaming); EXPORT_SYMBOL(hostap_set_auth_algs); EXPORT_SYMBOL(hostap_dump_rx_header); EXPORT_SYMBOL(hostap_dump_tx_header); -EXPORT_SYMBOL(hostap_80211_header_parse); EXPORT_SYMBOL(hostap_80211_get_hdrlen); EXPORT_SYMBOL(hostap_get_stats); EXPORT_SYMBOL(hostap_setup_dev); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 13925b627e3b..b1b3c523185d 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -2227,7 +2227,10 @@ static int iwl3945_scan_initiate(struct iwl3945_priv *priv) } IWL_DEBUG_INFO("Starting scan...\n"); - priv->scan_bands = 2; + if (priv->cfg->sku & IWL_SKU_G) + priv->scan_bands |= BIT(IEEE80211_BAND_2GHZ); + if (priv->cfg->sku & IWL_SKU_A) + priv->scan_bands |= BIT(IEEE80211_BAND_5GHZ); set_bit(STATUS_SCANNING, &priv->status); priv->scan_start = jiffies; priv->scan_pass_start = priv->scan_start; @@ -3352,13 +3355,18 @@ static void iwl3945_rx_scan_complete_notif(struct iwl3945_priv *priv, cancel_delayed_work(&priv->scan_check); IWL_DEBUG_INFO("Scan pass on %sGHz took %dms\n", - (priv->scan_bands == 2) ? "2.4" : "5.2", + (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ? + "2.4" : "5.2", jiffies_to_msecs(elapsed_jiffies (priv->scan_pass_start, jiffies))); - /* Remove this scanned band from the list - * of pending bands to scan */ - priv->scan_bands--; + /* Remove this scanned band from the list of pending + * bands to scan, band G precedes A in order of scanning + * as seen in iwl3945_bg_request_scan */ + if (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) + priv->scan_bands &= ~BIT(IEEE80211_BAND_2GHZ); + else if (priv->scan_bands & BIT(IEEE80211_BAND_5GHZ)) + priv->scan_bands &= ~BIT(IEEE80211_BAND_5GHZ); /* If a request to abort was given, or the scan did not succeed * then we reset the scan state machine and terminate, @@ -4972,7 +4980,7 @@ static int iwl3945_get_channels_for_scan(struct iwl3945_priv *priv, ch_info = iwl3945_get_channel_info(priv, band, scan_ch->channel); if (!is_channel_valid(ch_info)) { - IWL_DEBUG_SCAN("Channel %d is INVALID for this SKU.\n", + IWL_DEBUG_SCAN("Channel %d is INVALID for this band.\n", scan_ch->channel); continue; } @@ -6315,21 +6323,16 @@ static void iwl3945_bg_request_scan(struct work_struct *data) /* flags + rate selection */ - switch (priv->scan_bands) { - case 2: + if (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) { scan->flags = RXON_FLG_BAND_24G_MSK | RXON_FLG_AUTO_DETECT_MSK; scan->tx_cmd.rate = IWL_RATE_1M_PLCP; scan->good_CRC_th = 0; band = IEEE80211_BAND_2GHZ; - break; - - case 1: + } else if (priv->scan_bands & BIT(IEEE80211_BAND_5GHZ)) { scan->tx_cmd.rate = IWL_RATE_6M_PLCP; scan->good_CRC_th = IWL_GOOD_CRC_TH; band = IEEE80211_BAND_5GHZ; - break; - - default: + } else { IWL_WARNING("Invalid scan band count\n"); goto done; } @@ -6770,7 +6773,7 @@ static int iwl3945_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *co ch_info = iwl3945_get_channel_info(priv, conf->channel->band, conf->channel->hw_value); if (!is_channel_valid(ch_info)) { - IWL_DEBUG_SCAN("Channel %d [%d] is INVALID for this SKU.\n", + IWL_DEBUG_SCAN("Channel %d [%d] is INVALID for this band.\n", conf->channel->hw_value, conf->channel->band); IWL_DEBUG_MAC80211("leave - invalid channel\n"); spin_unlock_irqrestore(&priv->lock, flags); diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c index 883b42f7e998..5ed16ce78468 100644 --- a/drivers/net/wireless/iwlwifi/iwl4965-base.c +++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c @@ -1774,7 +1774,10 @@ static int iwl4965_scan_initiate(struct iwl_priv *priv) } IWL_DEBUG_INFO("Starting scan...\n"); - priv->scan_bands = 2; + if (priv->cfg->sku & IWL_SKU_G) + priv->scan_bands |= BIT(IEEE80211_BAND_2GHZ); + if (priv->cfg->sku & IWL_SKU_A) + priv->scan_bands |= BIT(IEEE80211_BAND_5GHZ); set_bit(STATUS_SCANNING, &priv->status); priv->scan_start = jiffies; priv->scan_pass_start = priv->scan_start; @@ -3023,8 +3026,9 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv, IWL_DEBUG_TX_REPLY("Tx queue reclaim %d\n", index); if (index != -1) { - int freed = iwl4965_tx_queue_reclaim(priv, txq_id, index); #ifdef CONFIG_IWL4965_HT + int freed = iwl4965_tx_queue_reclaim(priv, txq_id, index); + if (tid != MAX_TID_COUNT) priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; if (iwl4965_queue_space(&txq->q) > txq->q.low_mark && @@ -3276,13 +3280,18 @@ static void iwl4965_rx_scan_complete_notif(struct iwl_priv *priv, cancel_delayed_work(&priv->scan_check); IWL_DEBUG_INFO("Scan pass on %sGHz took %dms\n", - (priv->scan_bands == 2) ? "2.4" : "5.2", + (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ? + "2.4" : "5.2", jiffies_to_msecs(elapsed_jiffies (priv->scan_pass_start, jiffies))); - /* Remove this scanned band from the list - * of pending bands to scan */ - priv->scan_bands--; + /* Remove this scanned band from the list of pending + * bands to scan, band G precedes A in order of scanning + * as seen in iwl_bg_request_scan */ + if (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) + priv->scan_bands &= ~BIT(IEEE80211_BAND_2GHZ); + else if (priv->scan_bands & BIT(IEEE80211_BAND_5GHZ)) + priv->scan_bands &= ~BIT(IEEE80211_BAND_5GHZ); /* If a request to abort was given, or the scan did not succeed * then we reset the scan state machine and terminate, @@ -3292,7 +3301,7 @@ static void iwl4965_rx_scan_complete_notif(struct iwl_priv *priv, clear_bit(STATUS_SCAN_ABORTING, &priv->status); } else { /* If there are more bands on this scan pass reschedule */ - if (priv->scan_bands > 0) + if (priv->scan_bands) goto reschedule; } @@ -4635,10 +4644,9 @@ static int iwl4965_get_channels_for_scan(struct iwl_priv *priv, scan_ch->channel = ieee80211_frequency_to_channel(channels[i].center_freq); - ch_info = iwl_get_channel_info(priv, band, - scan_ch->channel); + ch_info = iwl_get_channel_info(priv, band, scan_ch->channel); if (!is_channel_valid(ch_info)) { - IWL_DEBUG_SCAN("Channel %d is INVALID for this SKU.\n", + IWL_DEBUG_SCAN("Channel %d is INVALID for this band.\n", scan_ch->channel); continue; } @@ -5830,8 +5838,7 @@ static void iwl4965_bg_request_scan(struct work_struct *data) scan->tx_cmd.stop_time.life_time = TX_CMD_LIFE_TIME_INFINITE; - switch (priv->scan_bands) { - case 2: + if (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) { scan->flags = RXON_FLG_BAND_24G_MSK | RXON_FLG_AUTO_DETECT_MSK; scan->tx_cmd.rate_n_flags = iwl4965_hw_set_rate_n_flags(IWL_RATE_1M_PLCP, @@ -5839,17 +5846,13 @@ static void iwl4965_bg_request_scan(struct work_struct *data) scan->good_CRC_th = 0; band = IEEE80211_BAND_2GHZ; - break; - - case 1: + } else if (priv->scan_bands & BIT(IEEE80211_BAND_5GHZ)) { scan->tx_cmd.rate_n_flags = iwl4965_hw_set_rate_n_flags(IWL_RATE_6M_PLCP, RATE_MCS_ANT_B_MSK); scan->good_CRC_th = IWL_GOOD_CRC_TH; band = IEEE80211_BAND_5GHZ; - break; - - default: + } else { IWL_WARNING("Invalid scan band count\n"); goto done; } diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c index 762e85bef55d..e43bae97ed8f 100644 --- a/drivers/net/wireless/prism54/islpci_eth.c +++ b/drivers/net/wireless/prism54/islpci_eth.c @@ -290,7 +290,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb) avs->version = cpu_to_be32(P80211CAPTURE_VERSION); avs->length = cpu_to_be32(sizeof (struct avs_80211_1_header)); - avs->mactime = cpu_to_be64(le64_to_cpu(clock)); + avs->mactime = cpu_to_be64(clock); avs->hosttime = cpu_to_be64(jiffies); avs->phytype = cpu_to_be32(6); /*OFDM: 6 for (g), 8 for (a) */ avs->channel = cpu_to_be32(channel_of_freq(freq)); diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index fdbd0ef2be4b..61e59c17a60a 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -138,11 +138,8 @@ static void rt2500usb_bbp_write(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt2500usb_bbp_check(rt2x00dev); - if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR8 register busy. Write failed.\n"); - mutex_unlock(&rt2x00dev->usb_cache_mutex); - return; - } + if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) + goto exit_fail; /* * Write the data into the BBP. @@ -155,6 +152,13 @@ static void rt2500usb_bbp_write(struct rt2x00_dev *rt2x00dev, rt2500usb_register_write_lock(rt2x00dev, PHY_CSR7, reg); mutex_unlock(&rt2x00dev->usb_cache_mutex); + + return; + +exit_fail: + mutex_unlock(&rt2x00dev->usb_cache_mutex); + + ERROR(rt2x00dev, "PHY_CSR8 register busy. Write failed.\n"); } static void rt2500usb_bbp_read(struct rt2x00_dev *rt2x00dev, @@ -168,10 +172,8 @@ static void rt2500usb_bbp_read(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt2500usb_bbp_check(rt2x00dev); - if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR8 register busy. Read failed.\n"); - return; - } + if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) + goto exit_fail; /* * Write the request into the BBP. @@ -186,17 +188,21 @@ static void rt2500usb_bbp_read(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt2500usb_bbp_check(rt2x00dev); - if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR8 register busy. Read failed.\n"); - *value = 0xff; - mutex_unlock(&rt2x00dev->usb_cache_mutex); - return; - } + if (rt2x00_get_field16(reg, PHY_CSR8_BUSY)) + goto exit_fail; rt2500usb_register_read_lock(rt2x00dev, PHY_CSR7, ®); *value = rt2x00_get_field16(reg, PHY_CSR7_DATA); mutex_unlock(&rt2x00dev->usb_cache_mutex); + + return; + +exit_fail: + mutex_unlock(&rt2x00dev->usb_cache_mutex); + + ERROR(rt2x00dev, "PHY_CSR8 register busy. Read failed.\n"); + *value = 0xff; } static void rt2500usb_rf_write(struct rt2x00_dev *rt2x00dev, diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 611d98320593..b4bf1e09cf9a 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -821,6 +821,7 @@ struct rt2x00_dev { /* * Scheduled work. */ + struct workqueue_struct *workqueue; struct work_struct intf_work; struct work_struct filter_work; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 2673d568bcac..c997d4f28ab3 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -75,7 +75,7 @@ static void rt2x00lib_start_link_tuner(struct rt2x00_dev *rt2x00dev) rt2x00lib_reset_link_tuner(rt2x00dev); - queue_delayed_work(rt2x00dev->hw->workqueue, + queue_delayed_work(rt2x00dev->workqueue, &rt2x00dev->link.work, LINK_TUNE_INTERVAL); } @@ -137,14 +137,6 @@ void rt2x00lib_disable_radio(struct rt2x00_dev *rt2x00dev) return; /* - * Stop all scheduled work. - */ - if (work_pending(&rt2x00dev->intf_work)) - cancel_work_sync(&rt2x00dev->intf_work); - if (work_pending(&rt2x00dev->filter_work)) - cancel_work_sync(&rt2x00dev->filter_work); - - /* * Stop the TX queues. */ ieee80211_stop_queues(rt2x00dev->hw); @@ -398,8 +390,8 @@ static void rt2x00lib_link_tuner(struct work_struct *work) * Increase tuner counter, and reschedule the next link tuner run. */ rt2x00dev->link.count++; - queue_delayed_work(rt2x00dev->hw->workqueue, &rt2x00dev->link.work, - LINK_TUNE_INTERVAL); + queue_delayed_work(rt2x00dev->workqueue, + &rt2x00dev->link.work, LINK_TUNE_INTERVAL); } static void rt2x00lib_packetfilter_scheduled(struct work_struct *work) @@ -433,6 +425,15 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac, spin_unlock(&intf->lock); + /* + * It is possible the radio was disabled while the work had been + * scheduled. If that happens we should return here immediately, + * note that in the spinlock protected area above the delayed_flags + * have been cleared correctly. + */ + if (!test_bit(DEVICE_ENABLED_RADIO, &rt2x00dev->flags)) + return; + if (delayed_flags & DELAYED_UPDATE_BEACON) { skb = ieee80211_beacon_get(rt2x00dev->hw, vif, &control); if (skb && rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw, @@ -441,7 +442,7 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac, } if (delayed_flags & DELAYED_CONFIG_ERP) - rt2x00lib_config_erp(rt2x00dev, intf, &intf->conf); + rt2x00lib_config_erp(rt2x00dev, intf, &conf); if (delayed_flags & DELAYED_LED_ASSOC) rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated); @@ -487,7 +488,7 @@ void rt2x00lib_beacondone(struct rt2x00_dev *rt2x00dev) rt2x00lib_beacondone_iter, rt2x00dev); - queue_work(rt2x00dev->hw->workqueue, &rt2x00dev->intf_work); + queue_work(rt2x00dev->workqueue, &rt2x00dev->intf_work); } EXPORT_SYMBOL_GPL(rt2x00lib_beacondone); @@ -1130,6 +1131,10 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) /* * Initialize configuration work. */ + rt2x00dev->workqueue = create_singlethread_workqueue("rt2x00lib"); + if (!rt2x00dev->workqueue) + goto exit; + INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled); INIT_WORK(&rt2x00dev->filter_work, rt2x00lib_packetfilter_scheduled); INIT_DELAYED_WORK(&rt2x00dev->link.work, rt2x00lib_link_tuner); @@ -1190,6 +1195,13 @@ void rt2x00lib_remove_dev(struct rt2x00_dev *rt2x00dev) rt2x00leds_unregister(rt2x00dev); /* + * Stop all queued work. Note that most tasks will already be halted + * during rt2x00lib_disable_radio() and rt2x00lib_uninitialize(). + */ + flush_workqueue(rt2x00dev->workqueue); + destroy_workqueue(rt2x00dev->workqueue); + + /* * Free ieee80211_hw memory. */ rt2x00lib_remove_hw(rt2x00dev); diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 87e280a21971..9cb023edd2e9 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -428,7 +428,7 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, if (!test_bit(DRIVER_REQUIRE_SCHEDULED, &rt2x00dev->flags)) rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags); else - queue_work(rt2x00dev->hw->workqueue, &rt2x00dev->filter_work); + queue_work(rt2x00dev->workqueue, &rt2x00dev->filter_work); } EXPORT_SYMBOL_GPL(rt2x00mac_configure_filter); @@ -509,7 +509,7 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, memcpy(&intf->conf, bss_conf, sizeof(*bss_conf)); if (delayed) { intf->delayed_flags |= delayed; - queue_work(rt2x00dev->hw->workqueue, &rt2x00dev->intf_work); + queue_work(rt2x00dev->workqueue, &rt2x00dev->intf_work); } spin_unlock(&intf->lock); } diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index fff8386e816b..83cc0147f698 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -134,11 +134,8 @@ static void rt73usb_bbp_write(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt73usb_bbp_check(rt2x00dev); - if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR3 register busy. Write failed.\n"); - mutex_unlock(&rt2x00dev->usb_cache_mutex); - return; - } + if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) + goto exit_fail; /* * Write the data into the BBP. @@ -151,6 +148,13 @@ static void rt73usb_bbp_write(struct rt2x00_dev *rt2x00dev, rt73usb_register_write_lock(rt2x00dev, PHY_CSR3, reg); mutex_unlock(&rt2x00dev->usb_cache_mutex); + + return; + +exit_fail: + mutex_unlock(&rt2x00dev->usb_cache_mutex); + + ERROR(rt2x00dev, "PHY_CSR3 register busy. Write failed.\n"); } static void rt73usb_bbp_read(struct rt2x00_dev *rt2x00dev, @@ -164,11 +168,8 @@ static void rt73usb_bbp_read(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt73usb_bbp_check(rt2x00dev); - if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR3 register busy. Read failed.\n"); - mutex_unlock(&rt2x00dev->usb_cache_mutex); - return; - } + if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) + goto exit_fail; /* * Write the request into the BBP. @@ -184,14 +185,19 @@ static void rt73usb_bbp_read(struct rt2x00_dev *rt2x00dev, * Wait until the BBP becomes ready. */ reg = rt73usb_bbp_check(rt2x00dev); - if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) { - ERROR(rt2x00dev, "PHY_CSR3 register busy. Read failed.\n"); - *value = 0xff; - return; - } + if (rt2x00_get_field32(reg, PHY_CSR3_BUSY)) + goto exit_fail; *value = rt2x00_get_field32(reg, PHY_CSR3_VALUE); mutex_unlock(&rt2x00dev->usb_cache_mutex); + + return; + +exit_fail: + mutex_unlock(&rt2x00dev->usb_cache_mutex); + + ERROR(rt2x00dev, "PHY_CSR3 register busy. Read failed.\n"); + *value = 0xff; } static void rt73usb_rf_write(struct rt2x00_dev *rt2x00dev, diff --git a/drivers/pci/access.c b/drivers/pci/access.c index ec8f7002b09d..39bb96b413ef 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -178,8 +178,7 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, int ret; int begin, end, i; - if (pos < 0 || pos > PCI_VPD_PCI22_SIZE || - size > PCI_VPD_PCI22_SIZE - pos) + if (pos < 0 || pos > vpd->base.len || size > vpd->base.len - pos) return -EINVAL; if (size == 0) return 0; @@ -223,8 +222,8 @@ static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size, u32 val; int ret; - if (pos < 0 || pos > PCI_VPD_PCI22_SIZE || pos & 3 || - size > PCI_VPD_PCI22_SIZE - pos || size < 4) + if (pos < 0 || pos > vpd->base.len || pos & 3 || + size > vpd->base.len - pos || size < 4) return -EINVAL; val = (u8) *buf++; @@ -255,11 +254,6 @@ out: return 4; } -static int pci_vpd_pci22_get_size(struct pci_dev *dev) -{ - return PCI_VPD_PCI22_SIZE; -} - static void pci_vpd_pci22_release(struct pci_dev *dev) { kfree(container_of(dev->vpd, struct pci_vpd_pci22, base)); @@ -268,7 +262,6 @@ static void pci_vpd_pci22_release(struct pci_dev *dev) static struct pci_vpd_ops pci_vpd_pci22_ops = { .read = pci_vpd_pci22_read, .write = pci_vpd_pci22_write, - .get_size = pci_vpd_pci22_get_size, .release = pci_vpd_pci22_release, }; @@ -284,6 +277,7 @@ int pci_vpd_pci22_init(struct pci_dev *dev) if (!vpd) return -ENOMEM; + vpd->base.len = PCI_VPD_PCI22_SIZE; vpd->base.ops = &pci_vpd_pci22_ops; spin_lock_init(&vpd->lock); vpd->cap = cap; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 648596d469f6..91156f85a926 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -700,9 +700,10 @@ cleanup_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, cleanup_p2p_bridge, NULL, NULL); - if (!(bridge = acpiphp_handle_to_bridge(handle))) - return AE_OK; - cleanup_bridge(bridge); + bridge = acpiphp_handle_to_bridge(handle); + if (bridge) + cleanup_bridge(bridge); + return AE_OK; } @@ -715,9 +716,19 @@ static void remove_bridge(acpi_handle handle) acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, cleanup_p2p_bridge, NULL, NULL); + /* + * On root bridges with hotplug slots directly underneath (ie, + * no p2p bridge inbetween), we call cleanup_bridge(). + * + * The else clause cleans up root bridges that either had no + * hotplug slots at all, or had a p2p bridge underneath. + */ bridge = acpiphp_handle_to_bridge(handle); if (bridge) cleanup_bridge(bridge); + else + acpi_remove_notify_handler(handle, ACPI_SYSTEM_NOTIFY, + handle_hotplug_event_bridge); } static struct pci_dev * get_apic_pci_info(acpi_handle handle) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 66c0fd21894b..bb0642318a95 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1637,12 +1637,43 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, } #ifdef CONFIG_DMAR_GFX_WA -extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); +struct iommu_prepare_data { + struct pci_dev *pdev; + int ret; +}; + +static int __init iommu_prepare_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + struct iommu_prepare_data *data; + + data = (struct iommu_prepare_data *)datax; + + data->ret = iommu_prepare_identity_map(data->pdev, + start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + return data->ret; + +} + +static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) +{ + int nid; + struct iommu_prepare_data data; + + data.pdev = pdev; + data.ret = 0; + + for_each_online_node(nid) { + work_with_active_regions(nid, iommu_prepare_work_fn, &data); + if (data.ret) + return data.ret; + } + return data.ret; +} + static void __init iommu_prepare_gfx_mapping(void) { struct pci_dev *pdev = NULL; - u64 base, size; - int slot; int ret; for_each_pci_dev(pdev) { @@ -1651,17 +1682,9 @@ static void __init iommu_prepare_gfx_mapping(void) continue; printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", pci_name(pdev)); - slot = arch_get_ram_range(0, &base, &size); - while (slot >= 0) { - ret = iommu_prepare_identity_map(pdev, - base, base + size); - if (ret) - goto error; - slot = arch_get_ram_range(slot, &base, &size); - } - continue; -error: - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + ret = iommu_prepare_with_active_regions(pdev); + if (ret) + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); } } #endif diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 6f3c7446c329..9c718583a237 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -736,9 +736,9 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) attr = kzalloc(sizeof(*attr), GFP_ATOMIC); if (attr) { pdev->vpd->attr = attr; - attr->size = pdev->vpd->ops->get_size(pdev); + attr->size = pdev->vpd->len; attr->attr.name = "vpd"; - attr->attr.mode = S_IRUGO | S_IWUSR; + attr->attr.mode = S_IRUSR | S_IWUSR; attr->read = pci_read_vpd; attr->write = pci_write_vpd; retval = sysfs_create_bin_file(&pdev->dev.kobj, attr); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0a497c1b4227..00408c97e5fc 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -21,11 +21,11 @@ extern int pci_user_write_config_dword(struct pci_dev *dev, int where, u32 val); struct pci_vpd_ops { int (*read)(struct pci_dev *dev, int pos, int size, char *buf); int (*write)(struct pci_dev *dev, int pos, int size, const char *buf); - int (*get_size)(struct pci_dev *dev); void (*release)(struct pci_dev *dev); }; struct pci_vpd { + unsigned int len; struct pci_vpd_ops *ops; struct bin_attribute *attr; /* descriptor for sysfs VPD entry */ }; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dabb563f51d9..338a3f94b4d4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1670,6 +1670,48 @@ static void __devinit quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching); +/* + * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the + * VPD end tag will hang the device. This problem was initially + * observed when a vpd entry was created in sysfs + * ('/sys/bus/pci/devices/<id>/vpd'). A read to this sysfs entry + * will dump 32k of data. Reading a full 32k will cause an access + * beyond the VPD end tag causing the device to hang. Once the device + * is hung, the bnx2 driver will not be able to reset the device. + * We believe that it is legal to read beyond the end tag and + * therefore the solution is to limit the read/write length. + */ +static void __devinit quirk_brcm_570x_limit_vpd(struct pci_dev *dev) +{ + /* Only disable the VPD capability for 5706, 5708, and 5709 rev. A */ + if ((dev->device == PCI_DEVICE_ID_NX2_5706) || + (dev->device == PCI_DEVICE_ID_NX2_5708) || + ((dev->device == PCI_DEVICE_ID_NX2_5709) && + (dev->revision & 0xf0) == 0x0)) { + if (dev->vpd) + dev->vpd->len = 0x80; + } +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5706, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5706S, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5708, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5708S, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5709, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5709S, + quirk_brcm_570x_limit_vpd); + #ifdef CONFIG_PCI_MSI /* Some chipsets do not support MSI. We cannot easily rely on setting * PCI_BUS_FLAGS_NO_MSI in its bus flags because there are actually @@ -1685,6 +1727,7 @@ static void __init quirk_disable_all_msi(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_GCNB_LE, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS400_200, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS480, quirk_disable_all_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi); /* Disable MSI on chipsets that are known to not support it */ diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 7e3ad4f3b343..58b7336640ff 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -126,12 +126,25 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) int err; struct rtc_time before, now; int first_time = 1; + unsigned long t_now, t_alm; + enum { none, day, month, year } missing = none; + unsigned days; - /* The lower level RTC driver may not be capable of filling - * in all fields of the rtc_time struct (eg. rtc-cmos), - * and so might instead return -1 in some fields. - * We deal with that here by grabbing a current RTC timestamp - * and using values from that for any missing (-1) values. + /* The lower level RTC driver may return -1 in some fields, + * creating invalid alarm->time values, for reasons like: + * + * - The hardware may not be capable of filling them in; + * many alarms match only on time-of-day fields, not + * day/month/year calendar data. + * + * - Some hardware uses illegal values as "wildcard" match + * values, which non-Linux firmware (like a BIOS) may try + * to set up as e.g. "alarm 15 minutes after each hour". + * Linux uses only oneshot alarms. + * + * When we see that here, we deal with it by using values from + * a current RTC timestamp for any missing (-1) values. The + * RTC driver prevents "periodic alarm" modes. * * But this can be racey, because some fields of the RTC timestamp * may have wrapped in the interval since we read the RTC alarm, @@ -174,6 +187,10 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) if (!alarm->enabled) return 0; + /* full-function RTCs won't have such missing fields */ + if (rtc_valid_tm(&alarm->time) == 0) + return 0; + /* get the "after" timestamp, to detect wrapped fields */ err = rtc_read_time(rtc, &now); if (err < 0) @@ -183,22 +200,85 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } while ( before.tm_min != now.tm_min || before.tm_hour != now.tm_hour || before.tm_mon != now.tm_mon - || before.tm_year != now.tm_year - || before.tm_isdst != now.tm_isdst); + || before.tm_year != now.tm_year); - /* Fill in any missing alarm fields using the timestamp */ + /* Fill in the missing alarm fields using the timestamp; we + * know there's at least one since alarm->time is invalid. + */ if (alarm->time.tm_sec == -1) alarm->time.tm_sec = now.tm_sec; if (alarm->time.tm_min == -1) alarm->time.tm_min = now.tm_min; if (alarm->time.tm_hour == -1) alarm->time.tm_hour = now.tm_hour; - if (alarm->time.tm_mday == -1) + + /* For simplicity, only support date rollover for now */ + if (alarm->time.tm_mday == -1) { alarm->time.tm_mday = now.tm_mday; - if (alarm->time.tm_mon == -1) + missing = day; + } + if (alarm->time.tm_mon == -1) { alarm->time.tm_mon = now.tm_mon; - if (alarm->time.tm_year == -1) + if (missing == none) + missing = month; + } + if (alarm->time.tm_year == -1) { alarm->time.tm_year = now.tm_year; + if (missing == none) + missing = year; + } + + /* with luck, no rollover is needed */ + rtc_tm_to_time(&now, &t_now); + rtc_tm_to_time(&alarm->time, &t_alm); + if (t_now < t_alm) + goto done; + + switch (missing) { + + /* 24 hour rollover ... if it's now 10am Monday, an alarm that + * that will trigger at 5am will do so at 5am Tuesday, which + * could also be in the next month or year. This is a common + * case, especially for PCs. + */ + case day: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day"); + t_alm += 24 * 60 * 60; + rtc_time_to_tm(t_alm, &alarm->time); + break; + + /* Month rollover ... if it's the 31th, an alarm on the 3rd will + * be next month. An alarm matching on the 30th, 29th, or 28th + * may end up in the month after that! Many newer PCs support + * this type of alarm. + */ + case month: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month"); + do { + if (alarm->time.tm_mon < 11) + alarm->time.tm_mon++; + else { + alarm->time.tm_mon = 0; + alarm->time.tm_year++; + } + days = rtc_month_days(alarm->time.tm_mon, + alarm->time.tm_year); + } while (days < alarm->time.tm_mday); + break; + + /* Year rollover ... easy except for leap years! */ + case year: + dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); + do { + alarm->time.tm_year++; + } while (!rtc_valid_tm(&alarm->time)); + break; + + default: + dev_warn(&rtc->dev, "alarm rollover not handled\n"); + } + +done: return 0; } EXPORT_SYMBOL_GPL(rtc_read_alarm); diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 82f62d25f921..67421b0d3a7b 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -331,14 +331,14 @@ static int sa1100_rtc_probe(struct platform_device *pdev) RCNR = 0; } + device_init_wakeup(&pdev->dev, 1); + rtc = rtc_device_register(pdev->name, &pdev->dev, &sa1100_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) return PTR_ERR(rtc); - device_init_wakeup(&pdev->dev, 1); - platform_set_drvdata(pdev, rtc); return 0; diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index eaf55945f21b..7dcfba1bbfe1 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -71,6 +71,7 @@ #define X1205_SR_RTCF 0x01 /* Clock failure */ #define X1205_SR_WEL 0x02 /* Write Enable Latch */ #define X1205_SR_RWEL 0x04 /* Register Write Enable */ +#define X1205_SR_AL0 0x20 /* Alarm 0 match */ #define X1205_DTR_DTR0 0x01 #define X1205_DTR_DTR1 0x02 @@ -78,6 +79,8 @@ #define X1205_HR_MIL 0x80 /* Set in ccr.hour for 24 hr mode */ +#define X1205_INT_AL0E 0x20 /* Alarm 0 enable */ + static struct i2c_driver x1205_driver; /* @@ -89,8 +92,8 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm, unsigned char reg_base) { unsigned char dt_addr[2] = { 0, reg_base }; - unsigned char buf[8]; + int i; struct i2c_msg msgs[] = { { client->addr, 0, 2, dt_addr }, /* setup read ptr */ @@ -98,7 +101,7 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm, }; /* read date registers */ - if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) { + if (i2c_transfer(client->adapter, &msgs[0], 2) != 2) { dev_err(&client->dev, "%s: read error\n", __func__); return -EIO; } @@ -110,6 +113,11 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + /* Mask out the enable bits if these are alarm registers */ + if (reg_base < X1205_CCR_BASE) + for (i = 0; i <= 4; i++) + buf[i] &= 0x7F; + tm->tm_sec = BCD2BIN(buf[CCR_SEC]); tm->tm_min = BCD2BIN(buf[CCR_MIN]); tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */ @@ -138,7 +146,7 @@ static int x1205_get_status(struct i2c_client *client, unsigned char *sr) }; /* read status register */ - if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) { + if (i2c_transfer(client->adapter, &msgs[0], 2) != 2) { dev_err(&client->dev, "%s: read error\n", __func__); return -EIO; } @@ -147,10 +155,11 @@ static int x1205_get_status(struct i2c_client *client, unsigned char *sr) } static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, - int datetoo, u8 reg_base) + int datetoo, u8 reg_base, unsigned char alm_enable) { - int i, xfer; + int i, xfer, nbytes; unsigned char buf[8]; + unsigned char rdata[10] = { 0, reg_base }; static const unsigned char wel[3] = { 0, X1205_REG_SR, X1205_SR_WEL }; @@ -189,6 +198,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100); } + /* If writing alarm registers, set compare bits on registers 0-4 */ + if (reg_base < X1205_CCR_BASE) + for (i = 0; i <= 4; i++) + buf[i] |= 0x80; + /* this sequence is required to unlock the chip */ if ((xfer = i2c_master_send(client, wel, 3)) != 3) { dev_err(&client->dev, "%s: wel - %d\n", __func__, xfer); @@ -200,19 +214,57 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, return -EIO; } + /* write register's data */ - for (i = 0; i < (datetoo ? 8 : 3); i++) { - unsigned char rdata[3] = { 0, reg_base + i, buf[i] }; + if (datetoo) + nbytes = 8; + else + nbytes = 3; + for (i = 0; i < nbytes; i++) + rdata[2+i] = buf[i]; + + xfer = i2c_master_send(client, rdata, nbytes+2); + if (xfer != nbytes+2) { + dev_err(&client->dev, + "%s: result=%d addr=%02x, data=%02x\n", + __func__, + xfer, rdata[1], rdata[2]); + return -EIO; + } + + /* If we wrote to the nonvolatile region, wait 10msec for write cycle*/ + if (reg_base < X1205_CCR_BASE) { + unsigned char al0e[3] = { 0, X1205_REG_INT, 0 }; + + msleep(10); - xfer = i2c_master_send(client, rdata, 3); + /* ...and set or clear the AL0E bit in the INT register */ + + /* Need to set RWEL again as the write has cleared it */ + xfer = i2c_master_send(client, rwel, 3); if (xfer != 3) { dev_err(&client->dev, - "%s: xfer=%d addr=%02x, data=%02x\n", + "%s: aloe rwel - %d\n", __func__, - xfer, rdata[1], rdata[2]); + xfer); + return -EIO; + } + + if (alm_enable) + al0e[2] = X1205_INT_AL0E; + + xfer = i2c_master_send(client, al0e, 3); + if (xfer != 3) { + dev_err(&client->dev, + "%s: al0e - %d\n", + __func__, + xfer); return -EIO; } - }; + + /* and wait 10msec again for this write to complete */ + msleep(10); + } /* disable further writes */ if ((xfer = i2c_master_send(client, diswe, 3)) != 3) { @@ -230,9 +282,9 @@ static int x1205_fix_osc(struct i2c_client *client) tm.tm_hour = tm.tm_min = tm.tm_sec = 0; - if ((err = x1205_set_datetime(client, &tm, 0, X1205_CCR_BASE)) < 0) - dev_err(&client->dev, - "unable to restart the oscillator\n"); + err = x1205_set_datetime(client, &tm, 0, X1205_CCR_BASE, 0); + if (err < 0) + dev_err(&client->dev, "unable to restart the oscillator\n"); return err; } @@ -248,7 +300,7 @@ static int x1205_get_dtrim(struct i2c_client *client, int *trim) }; /* read dtr register */ - if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) { + if (i2c_transfer(client->adapter, &msgs[0], 2) != 2) { dev_err(&client->dev, "%s: read error\n", __func__); return -EIO; } @@ -280,7 +332,7 @@ static int x1205_get_atrim(struct i2c_client *client, int *trim) }; /* read atr register */ - if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) { + if (i2c_transfer(client->adapter, &msgs[0], 2) != 2) { dev_err(&client->dev, "%s: read error\n", __func__); return -EIO; } @@ -403,14 +455,33 @@ static int x1205_validate_client(struct i2c_client *client) static int x1205_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { - return x1205_get_datetime(to_i2c_client(dev), - &alrm->time, X1205_ALM0_BASE); + int err; + unsigned char intreg, status; + static unsigned char int_addr[2] = { 0, X1205_REG_INT }; + struct i2c_client *client = to_i2c_client(dev); + struct i2c_msg msgs[] = { + { client->addr, 0, 2, int_addr }, /* setup read ptr */ + { client->addr, I2C_M_RD, 1, &intreg }, /* read INT register */ + }; + + /* read interrupt register and status register */ + if (i2c_transfer(client->adapter, &msgs[0], 2) != 2) { + dev_err(&client->dev, "%s: read error\n", __func__); + return -EIO; + } + err = x1205_get_status(client, &status); + if (err == 0) { + alrm->pending = (status & X1205_SR_AL0) ? 1 : 0; + alrm->enabled = (intreg & X1205_INT_AL0E) ? 1 : 0; + err = x1205_get_datetime(client, &alrm->time, X1205_ALM0_BASE); + } + return err; } static int x1205_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) { return x1205_set_datetime(to_i2c_client(dev), - &alrm->time, 1, X1205_ALM0_BASE); + &alrm->time, 1, X1205_ALM0_BASE, alrm->enabled); } static int x1205_rtc_read_time(struct device *dev, struct rtc_time *tm) @@ -422,7 +493,7 @@ static int x1205_rtc_read_time(struct device *dev, struct rtc_time *tm) static int x1205_rtc_set_time(struct device *dev, struct rtc_time *tm) { return x1205_set_datetime(to_i2c_client(dev), - tm, 1, X1205_CCR_BASE); + tm, 1, X1205_CCR_BASE, 0); } static int x1205_rtc_proc(struct device *dev, struct seq_file *seq) diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index a0b6d414953d..59fbef08d690 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -2359,6 +2359,24 @@ void scsi_esp_unregister(struct esp *esp) } EXPORT_SYMBOL(scsi_esp_unregister); +static int esp_target_alloc(struct scsi_target *starget) +{ + struct esp *esp = shost_priv(dev_to_shost(&starget->dev)); + struct esp_target_data *tp = &esp->target[starget->id]; + + tp->starget = starget; + + return 0; +} + +static void esp_target_destroy(struct scsi_target *starget) +{ + struct esp *esp = shost_priv(dev_to_shost(&starget->dev)); + struct esp_target_data *tp = &esp->target[starget->id]; + + tp->starget = NULL; +} + static int esp_slave_alloc(struct scsi_device *dev) { struct esp *esp = shost_priv(dev->host); @@ -2370,8 +2388,6 @@ static int esp_slave_alloc(struct scsi_device *dev) return -ENOMEM; dev->hostdata = lp; - tp->starget = dev->sdev_target; - spi_min_period(tp->starget) = esp->min_period; spi_max_offset(tp->starget) = 15; @@ -2608,6 +2624,8 @@ struct scsi_host_template scsi_esp_template = { .name = "esp", .info = esp_info, .queuecommand = esp_queuecommand, + .target_alloc = esp_target_alloc, + .target_destroy = esp_target_destroy, .slave_alloc = esp_slave_alloc, .slave_configure = esp_slave_configure, .slave_destroy = esp_slave_destroy, diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 45df83b9d847..0fe031f003e7 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -61,7 +61,7 @@ static int ses_probe(struct device *dev) return err; } -#define SES_TIMEOUT 30 +#define SES_TIMEOUT (30 * HZ) #define SES_RETRIES 3 static int ses_recv_diag(struct scsi_device *sdev, int page_code, diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index c9b64e73c987..42d2e108b679 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -1991,7 +1991,9 @@ struct uart_match { static int serial_match_port(struct device *dev, void *data) { struct uart_match *match = data; - dev_t devt = MKDEV(match->driver->major, match->driver->minor) + match->port->line; + struct tty_driver *tty_drv = match->driver->tty_driver; + dev_t devt = MKDEV(tty_drv->major, tty_drv->minor_start) + + match->port->line; return dev->devt == devt; /* Actually, only one tty per port */ } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 799337f7fde1..f5b60c70389b 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -167,14 +167,14 @@ spidev_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) mutex_lock(&spidev->buf_lock); status = spidev_sync_read(spidev, count); - if (status == 0) { + if (status > 0) { unsigned long missing; - missing = copy_to_user(buf, spidev->buffer, count); - if (count && missing == count) + missing = copy_to_user(buf, spidev->buffer, status); + if (missing == status) status = -EFAULT; else - status = count - missing; + status = status - missing; } mutex_unlock(&spidev->buf_lock); @@ -200,8 +200,6 @@ spidev_write(struct file *filp, const char __user *buf, missing = copy_from_user(spidev->buffer, buf, count); if (missing == 0) { status = spidev_sync_write(spidev, count); - if (status == 0) - status = count; } else status = -EFAULT; mutex_unlock(&spidev->buf_lock); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 4b628526df09..a86e952ed4ca 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -12,3 +12,12 @@ menuconfig THERMAL cooling devices. All platforms with ACPI thermal support can use this driver. If you want this support, you should say Y or M here. + +config THERMAL_HWMON + bool "Hardware monitoring support" + depends on HWMON=y || HWMON=THERMAL + help + The generic thermal sysfs driver's hardware monitoring support + requires a 2.10.7/3.0.2 or later lm-sensors userspace. + + Say Y if your user-space is new enough. diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 6098787341f3..fe07462d5947 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -295,8 +295,8 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* Device management */ -#if defined(CONFIG_HWMON) || \ - (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) +#if defined(CONFIG_THERMAL_HWMON) + /* hwmon sys I/F */ #include <linux/hwmon.h> static LIST_HEAD(thermal_hwmon_list); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 63c34043b4d9..c3201affa0b6 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1125,9 +1125,6 @@ static void stop_data_traffic(struct acm *acm) for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->ru[i].urb); - INIT_LIST_HEAD(&acm->filled_read_bufs); - INIT_LIST_HEAD(&acm->spare_read_bufs); - tasklet_enable(&acm->urb_task); cancel_work_sync(&acm->work); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 09a53e7f3327..42a436478b78 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -924,6 +924,15 @@ static int register_root_hub(struct usb_hcd *hcd) return retval; } +void usb_enable_root_hub_irq (struct usb_bus *bus) +{ + struct usb_hcd *hcd; + + hcd = container_of (bus, struct usb_hcd, self); + if (hcd->driver->hub_irq_enable && hcd->state != HC_STATE_HALT) + hcd->driver->hub_irq_enable (hcd); +} + /*-------------------------------------------------------------------------*/ @@ -1684,19 +1693,30 @@ EXPORT_SYMBOL_GPL(usb_bus_start_enum); irqreturn_t usb_hcd_irq (int irq, void *__hcd) { struct usb_hcd *hcd = __hcd; - int start = hcd->state; + unsigned long flags; + irqreturn_t rc; - if (unlikely(start == HC_STATE_HALT || - !test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))) - return IRQ_NONE; - if (hcd->driver->irq (hcd) == IRQ_NONE) - return IRQ_NONE; + /* IRQF_DISABLED doesn't work correctly with shared IRQs + * when the first handler doesn't use it. So let's just + * assume it's never used. + */ + local_irq_save(flags); - set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags); + if (unlikely(hcd->state == HC_STATE_HALT || + !test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))) { + rc = IRQ_NONE; + } else if (hcd->driver->irq(hcd) == IRQ_NONE) { + rc = IRQ_NONE; + } else { + set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags); + + if (unlikely(hcd->state == HC_STATE_HALT)) + usb_hc_died(hcd); + rc = IRQ_HANDLED; + } - if (unlikely(hcd->state == HC_STATE_HALT)) - usb_hc_died (hcd); - return IRQ_HANDLED; + local_irq_restore(flags); + return rc; } /*-------------------------------------------------------------------------*/ @@ -1860,6 +1880,13 @@ int usb_add_hcd(struct usb_hcd *hcd, /* enable irqs just before we start the controller */ if (hcd->driver->irq) { + + /* IRQF_DISABLED doesn't work as advertised when used together + * with IRQF_SHARED. As usb_hcd_irq() will always disable + * interrupts we can remove it here. + */ + irqflags &= ~IRQF_DISABLED; + snprintf(hcd->irq_descr, sizeof(hcd->irq_descr), "%s:usb%d", hcd->driver->description, hcd->self.busnum); if ((retval = request_irq(irqnum, &usb_hcd_irq, irqflags, diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index a0bf5df6cb6f..b9de1569b39e 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -210,6 +210,8 @@ struct hc_driver { int (*bus_suspend)(struct usb_hcd *); int (*bus_resume)(struct usb_hcd *); int (*start_port_reset)(struct usb_hcd *, unsigned port_num); + void (*hub_irq_enable)(struct usb_hcd *); + /* Needed only if port-change IRQs are level-triggered */ /* force handover of high-speed port to full-speed companion */ void (*relinquish_port)(struct usb_hcd *, int); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 94789be54ca3..4cfe32a16c37 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -713,18 +713,11 @@ static void hub_restart(struct usb_hub *hub, int type) } /* Was the power session lost while we were suspended? */ - switch (type) { - case HUB_RESET_RESUME: - portstatus = 0; - portchange = USB_PORT_STAT_C_CONNECTION; - break; + status = hub_port_status(hub, port1, &portstatus, &portchange); - case HUB_RESET: - case HUB_RESUME: - status = hub_port_status(hub, port1, - &portstatus, &portchange); - break; - } + /* If the device is gone, khubd will handle it later */ + if (status == 0 && !(portstatus & USB_PORT_STAT_CONNECTION)) + continue; /* For "USB_PERSIST"-enabled children we must * mark the child device for reset-resume and @@ -2080,6 +2073,8 @@ int usb_port_resume(struct usb_device *udev) } clear_bit(port1, hub->busy_bits); + if (!hub->hdev->parent && !hub->busy_bits[0]) + usb_enable_root_hub_irq(hub->hdev->bus); if (status == 0) status = finish_port_resume(udev); @@ -3009,6 +3004,11 @@ static void hub_events(void) hub->activating = 0; + /* If this is a root hub, tell the HCD it's okay to + * re-enable port-change interrupts now. */ + if (!hdev->parent && !hub->busy_bits[0]) + usb_enable_root_hub_irq(hdev->bus); + loop_autopm: /* Allow autosuspend if we're not going to run again */ if (list_empty(&hub->event_list)) @@ -3234,6 +3234,8 @@ int usb_reset_device(struct usb_device *udev) break; } clear_bit(port1, parent_hub->busy_bits); + if (!parent_hdev->parent && !parent_hub->busy_bits[0]) + usb_enable_root_hub_irq(parent_hdev->bus); if (ret < 0) goto re_enumerate; diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 35a03095757e..90245fd8bac4 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -177,6 +177,15 @@ timer_action_done (struct ehci_hcd *ehci, enum ehci_timer_action action) static inline void timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action) { + /* Don't override timeouts which shrink or (later) disable + * the async ring; just the I/O watchdog. Note that if a + * SHRINK were pending, OFF would never be requested. + */ + if (timer_pending(&ehci->watchdog) + && ((BIT(TIMER_ASYNC_SHRINK) | BIT(TIMER_ASYNC_OFF)) + & ehci->actions)) + return; + if (!test_and_set_bit (action, &ehci->actions)) { unsigned long t; @@ -192,15 +201,7 @@ timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action) t = EHCI_SHRINK_JIFFIES; break; } - t += jiffies; - // all timings except IAA watchdog can be overridden. - // async queue SHRINK often precedes IAA. while it's ready - // to go OFF neither can matter, and afterwards the IO - // watchdog stops unless there's still periodic traffic. - if (time_before_eq(t, ehci->watchdog.expires) - && timer_pending (&ehci->watchdog)) - return; - mod_timer (&ehci->watchdog, t); + mod_timer(&ehci->watchdog, t + jiffies); } } diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index c96db1153dcf..e534f9de0f05 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -261,6 +261,7 @@ static const struct hc_driver ohci_at91_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c index 1b9abdba920b..f90fe0c7373f 100644 --- a/drivers/usb/host/ohci-au1xxx.c +++ b/drivers/usb/host/ohci-au1xxx.c @@ -288,6 +288,7 @@ static const struct hc_driver ohci_au1xxx_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-ep93xx.c b/drivers/usb/host/ohci-ep93xx.c index 06aadfb0ec29..5adaf36e47d0 100644 --- a/drivers/usb/host/ohci-ep93xx.c +++ b/drivers/usb/host/ohci-ep93xx.c @@ -135,6 +135,7 @@ static struct hc_driver ohci_ep93xx_hc_driver = { .get_frame_number = ohci_get_frame, .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 33f1c1c32edf..a8160d65f32b 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -1054,7 +1054,7 @@ MODULE_LICENSE ("GPL"); #ifdef CONFIG_MFD_SM501 #include "ohci-sm501.c" -#define PLATFORM_DRIVER ohci_hcd_sm501_driver +#define SM501_OHCI_DRIVER ohci_hcd_sm501_driver #endif #if !defined(PCI_DRIVER) && \ @@ -1062,6 +1062,7 @@ MODULE_LICENSE ("GPL"); !defined(OF_PLATFORM_DRIVER) && \ !defined(SA1111_DRIVER) && \ !defined(PS3_SYSTEM_BUS_DRIVER) && \ + !defined(SM501_OHCI_DRIVER) && \ !defined(SSB_OHCI_DRIVER) #error "missing bus glue for ohci-hcd" #endif @@ -1121,9 +1122,18 @@ static int __init ohci_hcd_mod_init(void) goto error_ssb; #endif +#ifdef SM501_OHCI_DRIVER + retval = platform_driver_register(&SM501_OHCI_DRIVER); + if (retval < 0) + goto error_sm501; +#endif + return retval; /* Error path */ +#ifdef SM501_OHCI_DRIVER + error_sm501: +#endif #ifdef SSB_OHCI_DRIVER error_ssb: #endif @@ -1159,6 +1169,9 @@ module_init(ohci_hcd_mod_init); static void __exit ohci_hcd_mod_exit(void) { +#ifdef SM501_OHCI_DRIVER + platform_driver_unregister(&SM501_OHCI_DRIVER); +#endif #ifdef SSB_OHCI_DRIVER ssb_driver_unregister(&SSB_OHCI_DRIVER); #endif diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 79a78029f896..b56739221d11 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -36,6 +36,18 @@ /*-------------------------------------------------------------------------*/ +/* hcd->hub_irq_enable() */ +static void ohci_rhsc_enable (struct usb_hcd *hcd) +{ + struct ohci_hcd *ohci = hcd_to_ohci (hcd); + + spin_lock_irq(&ohci->lock); + if (!ohci->autostop) + del_timer(&hcd->rh_timer); /* Prevent next poll */ + ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable); + spin_unlock_irq(&ohci->lock); +} + #define OHCI_SCHED_ENABLES \ (OHCI_CTRL_CLE|OHCI_CTRL_BLE|OHCI_CTRL_PLE|OHCI_CTRL_IE) @@ -362,28 +374,18 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed, int any_connected) { int poll_rh = 1; - int rhsc; - rhsc = ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC; switch (ohci->hc_control & OHCI_CTRL_HCFS) { case OHCI_USB_OPER: - /* If no status changes are pending, enable status-change - * interrupts. - */ - if (!rhsc && !changed) { - rhsc = OHCI_INTR_RHSC; - ohci_writel(ohci, rhsc, &ohci->regs->intrenable); - } - - /* Keep on polling until we know a device is connected - * and RHSC is enabled, or until we autostop. - */ + /* keep on polling until we know a device is connected + * and RHSC is enabled */ if (!ohci->autostop) { if (any_connected || !device_may_wakeup(&ohci_to_hcd(ohci) ->self.root_hub->dev)) { - if (rhsc) + if (ohci_readl(ohci, &ohci->regs->intrenable) & + OHCI_INTR_RHSC) poll_rh = 0; } else { ohci->autostop = 1; @@ -396,13 +398,12 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed, ohci->autostop = 0; ohci->next_statechange = jiffies + STATECHANGE_DELAY; - } else if (rhsc && time_after_eq(jiffies, + } else if (time_after_eq(jiffies, ohci->next_statechange) && !ohci->ed_rm_list && !(ohci->hc_control & OHCI_SCHED_ENABLES)) { ohci_rh_suspend(ohci, 1); - poll_rh = 0; } } break; @@ -416,12 +417,6 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed, else usb_hcd_resume_root_hub(ohci_to_hcd(ohci)); } else { - if (!rhsc && (ohci->autostop || - ohci_to_hcd(ohci)->self.root_hub-> - do_remote_wakeup)) - ohci_writel(ohci, OHCI_INTR_RHSC, - &ohci->regs->intrenable); - /* everything is idle, no need for polling */ poll_rh = 0; } @@ -443,16 +438,12 @@ static inline int ohci_rh_resume(struct ohci_hcd *ohci) static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed, int any_connected) { - /* If RHSC is enabled, don't poll */ - if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC) - return 0; + int poll_rh = 1; - /* If no status changes are pending, enable status-change interrupts */ - if (!changed) { - ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable); - return 0; - } - return 1; + /* keep on polling until RHSC is enabled */ + if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC) + poll_rh = 0; + return poll_rh; } #endif /* CONFIG_PM */ diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c index 96d14fa1d833..13c12ed22252 100644 --- a/drivers/usb/host/ohci-lh7a404.c +++ b/drivers/usb/host/ohci-lh7a404.c @@ -193,6 +193,7 @@ static const struct hc_driver ohci_lh7a404_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index 6859fb5f1d6f..3a7c24c03671 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -466,6 +466,7 @@ static const struct hc_driver ohci_omap_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 3bf175d95a23..4696cc912e16 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -327,6 +327,7 @@ static const struct hc_driver ohci_pci_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c index 664f07ee8732..28b458f20cc3 100644 --- a/drivers/usb/host/ohci-pnx4008.c +++ b/drivers/usb/host/ohci-pnx4008.c @@ -280,6 +280,7 @@ static const struct hc_driver ohci_pnx4008_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-pnx8550.c b/drivers/usb/host/ohci-pnx8550.c index 28467e288a93..605d59cba28e 100644 --- a/drivers/usb/host/ohci-pnx8550.c +++ b/drivers/usb/host/ohci-pnx8550.c @@ -201,6 +201,7 @@ static const struct hc_driver ohci_pnx8550_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c index 50e55db13636..a67252791223 100644 --- a/drivers/usb/host/ohci-ppc-of.c +++ b/drivers/usb/host/ohci-ppc-of.c @@ -72,6 +72,7 @@ static const struct hc_driver ohci_ppc_of_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c index cd3398b675b2..523c30125577 100644 --- a/drivers/usb/host/ohci-ppc-soc.c +++ b/drivers/usb/host/ohci-ppc-soc.c @@ -172,6 +172,7 @@ static const struct hc_driver ohci_ppc_soc_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index bfdeb0d22d05..c1935ae537f8 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -68,6 +68,7 @@ static const struct hc_driver ps3_ohci_hc_driver = { .get_frame_number = ohci_get_frame, .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, .start_port_reset = ohci_start_port_reset, #if defined(CONFIG_PM) .bus_suspend = ohci_bus_suspend, diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c index 70b0d4b459e7..d4ee27d92be8 100644 --- a/drivers/usb/host/ohci-pxa27x.c +++ b/drivers/usb/host/ohci-pxa27x.c @@ -298,6 +298,7 @@ static const struct hc_driver ohci_pxa27x_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index 9c9f3b59186f..9b547407c934 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -952,6 +952,7 @@ rescan_this: struct urb *urb; urb_priv_t *urb_priv; __hc32 savebits; + u32 tdINFO; td = list_entry (entry, struct td, td_list); urb = td->urb; @@ -966,6 +967,17 @@ rescan_this: savebits = *prev & ~cpu_to_hc32 (ohci, TD_MASK); *prev = td->hwNextTD | savebits; + /* If this was unlinked, the TD may not have been + * retired ... so manually save the data toggle. + * The controller ignores the value we save for + * control and ISO endpoints. + */ + tdINFO = hc32_to_cpup(ohci, &td->hwINFO); + if ((tdINFO & TD_T) == TD_T_DATA0) + ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_C); + else if ((tdINFO & TD_T) == TD_T_DATA1) + ed->hwHeadP |= cpu_to_hc32(ohci, ED_C); + /* HC may have partly processed this TD */ td_done (ohci, urb, td); urb_priv->td_cnt++; diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c index a73d2ff322e2..ead4772f0f27 100644 --- a/drivers/usb/host/ohci-s3c2410.c +++ b/drivers/usb/host/ohci-s3c2410.c @@ -466,6 +466,7 @@ static const struct hc_driver ohci_s3c2410_hc_driver = { */ .hub_status_data = ohci_s3c2410_hub_status_data, .hub_control = ohci_s3c2410_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c index 99438c65981b..0f48f2d99226 100644 --- a/drivers/usb/host/ohci-sa1111.c +++ b/drivers/usb/host/ohci-sa1111.c @@ -231,6 +231,7 @@ static const struct hc_driver ohci_sa1111_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-sh.c b/drivers/usb/host/ohci-sh.c index 60f03cc7ec4f..e7ee607278fe 100644 --- a/drivers/usb/host/ohci-sh.c +++ b/drivers/usb/host/ohci-sh.c @@ -68,6 +68,7 @@ static const struct hc_driver ohci_sh_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c index e899a77dfb83..e610698c6b60 100644 --- a/drivers/usb/host/ohci-sm501.c +++ b/drivers/usb/host/ohci-sm501.c @@ -75,6 +75,7 @@ static const struct hc_driver ohci_sm501_hc_driver = { */ .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/ohci-ssb.c b/drivers/usb/host/ohci-ssb.c index c4265caec780..7275186db315 100644 --- a/drivers/usb/host/ohci-ssb.c +++ b/drivers/usb/host/ohci-ssb.c @@ -81,6 +81,7 @@ static const struct hc_driver ssb_ohci_hc_driver = { .hub_status_data = ohci_hub_status_data, .hub_control = ohci_hub_control, + .hub_irq_enable = ohci_rhsc_enable, #ifdef CONFIG_PM .bus_suspend = ohci_bus_suspend, .bus_resume = ohci_bus_resume, diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index f29307405bb3..9b6323f768b2 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -2934,6 +2934,16 @@ static int u132_start_port_reset(struct usb_hcd *hcd, unsigned port_num) return 0; } +static void u132_hub_irq_enable(struct usb_hcd *hcd) +{ + struct u132 *u132 = hcd_to_u132(hcd); + if (u132->going > 1) { + dev_err(&u132->platform_dev->dev, "device has been removed %d\n" + , u132->going); + } else if (u132->going > 0) + dev_err(&u132->platform_dev->dev, "device is being removed\n"); +} + #ifdef CONFIG_PM static int u132_bus_suspend(struct usb_hcd *hcd) @@ -2985,6 +2995,7 @@ static struct hc_driver u132_hc_driver = { .bus_suspend = u132_bus_suspend, .bus_resume = u132_bus_resume, .start_port_reset = u132_start_port_reset, + .hub_irq_enable = u132_hub_irq_enable, }; /* diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index cb7fa0eaf3ae..33182f4c2267 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3264,8 +3264,6 @@ static void sisusb_disconnect(struct usb_interface *intf) /* decrement our usage count */ kref_put(&sisusb->kref, sisusb_delete); - - dev_info(&sisusb->sisusb_dev->dev, "Disconnected\n"); } static struct usb_device_id sisusb_table [] = { diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 5234e7a3bd2c..0ff4a3971e45 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -637,6 +637,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, + { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h index 06e0ecabb3eb..8302eca893ea 100644 --- a/drivers/usb/serial/ftdi_sio.h +++ b/drivers/usb/serial/ftdi_sio.h @@ -828,6 +828,9 @@ /* Propox devices */ #define FTDI_PROPOX_JTAGCABLEII_PID 0xD738 +/* Rig Expert Ukraine devices */ +#define FTDI_REU_TINY_PID 0xED22 /* RigExpert Tiny */ + /* Commands */ #define FTDI_SIO_RESET 0 /* Reset the port */ #define FTDI_SIO_MODEM_CTRL 1 /* Set the modem control register */ diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index ea924dc48496..d9fb3768a2d7 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -570,7 +570,12 @@ static struct usb_serial_driver ipaq_device = { .description = "PocketPC PDA", .usb_driver = &ipaq_driver, .id_table = ipaq_id_table, - .num_ports = 2, + /* + * some devices have an extra endpoint, which + * must be ignored as it would make the core + * create a second port which oopses when used + */ + .num_ports = 1, .open = ipaq_open, .close = ipaq_close, .attach = ipaq_startup, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 43cfde83a93b..a73420dd052a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -306,6 +306,7 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_C100_1) }, { USB_DEVICE(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_C100_2) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(MAXON_VENDOR_ID, 0x6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864E) }, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 103195abd417..2a0dd1b50dc4 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -57,6 +57,7 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GPRS) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index cff160abb130..6ac3bbcf7a22 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -15,6 +15,7 @@ #define PL2303_PRODUCT_ID_RSAQ3 0xaaa2 #define PL2303_PRODUCT_ID_ALDIGA 0x0611 #define PL2303_PRODUCT_ID_MMX 0x0612 +#define PL2303_PRODUCT_ID_GPRS 0x0609 #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 45fe3663fa7f..39a7c11795c4 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -402,11 +402,19 @@ UNUSUAL_DEV( 0x04a5, 0x3010, 0x0100, 0x0100, US_FL_IGNORE_RESIDUE ), #ifdef CONFIG_USB_STORAGE_CYPRESS_ATACB +/* CY7C68300 : support atacb */ UNUSUAL_DEV( 0x04b4, 0x6830, 0x0000, 0x9999, "Cypress", "Cypress AT2LP", US_SC_CYP_ATACB, US_PR_DEVICE, NULL, 0), + +/* CY7C68310 : support atacb and atacb2 */ +UNUSUAL_DEV( 0x04b4, 0x6831, 0x0000, 0x9999, + "Cypress", + "Cypress ISD-300LP", + US_SC_CYP_ATACB, US_PR_DEVICE, NULL, + 0), #endif /* Reported by Simon Levitt <[email protected]> diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 002b61b4f0f6..e0c5f96b273d 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1825,12 +1825,13 @@ config FB_FSL_DIU config FB_W100 tristate "W100 frame buffer support" - depends on FB && PXA_SHARPSL + depends on FB && ARCH_PXA select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT ---help--- Frame buffer driver for the w100 as found on the Sharp SL-Cxx series. + It can also drive the w3220 chip found on iPAQ hx4700. This driver is also available as a module ( = code which can be inserted and removed from the running kernel whenever you want). The diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index 0a2785361ca3..712dabc6269f 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -286,7 +286,7 @@ static struct diu_pool pool; * rheap and make the furture large allocation fail. */ -void *fsl_diu_alloc(unsigned long size, phys_addr_t *phys) +static void *fsl_diu_alloc(unsigned long size, phys_addr_t *phys) { void *virt; @@ -311,12 +311,12 @@ void *fsl_diu_alloc(unsigned long size, phys_addr_t *phys) memset(virt, 0, size); } - pr_debug("rh virt=%p phys=%lx\n", virt, *phys); + pr_debug("rh virt=%p phys=%llx\n", virt, (unsigned long long)*phys); return virt; } -void fsl_diu_free(void *p, unsigned long size) +static void fsl_diu_free(void *p, unsigned long size) { pr_debug("p=%p size=%lu\n", p, size); @@ -770,7 +770,7 @@ static int map_video_memory(struct fb_info *info) info->fix.smem_len = info->fix.line_length * info->var.yres_virtual; pr_debug("MAP_VIDEO_MEMORY: smem_len = %d\n", info->fix.smem_len); info->screen_base = fsl_diu_alloc(info->fix.smem_len, &phys); - if (info->screen_base == 0) { + if (info->screen_base == NULL) { printk(KERN_ERR "Unable to allocate fb memory\n"); return -ENOMEM; } @@ -788,7 +788,7 @@ static int map_video_memory(struct fb_info *info) static void unmap_video_memory(struct fb_info *info) { fsl_diu_free(info->screen_base, info->fix.smem_len); - info->screen_base = 0; + info->screen_base = NULL; info->fix.smem_start = 0; info->fix.smem_len = 0; } @@ -1158,7 +1158,7 @@ static int init_fbinfo(struct fb_info *info) return 0; } -static int install_fb(struct fb_info *info) +static int __devinit install_fb(struct fb_info *info) { int rc; struct mfb_info *mfbi = info->par; @@ -1233,7 +1233,7 @@ static int install_fb(struct fb_info *info) return 0; } -static void __exit uninstall_fb(struct fb_info *info) +static void uninstall_fb(struct fb_info *info) { struct mfb_info *mfbi = info->par; @@ -1287,7 +1287,7 @@ static int request_irq_local(int irq) /* Read to clear the status */ status = in_be32(&hw->int_status); - ret = request_irq(irq, fsl_diu_isr, 0, "diu", 0); + ret = request_irq(irq, fsl_diu_isr, 0, "diu", NULL); if (ret) pr_info("Request diu IRQ failed.\n"); else { @@ -1312,7 +1312,7 @@ static void free_irq_local(int irq) /* Disable all LCDC interrupt */ out_be32(&hw->int_mask, 0x1f); - free_irq(irq, 0); + free_irq(irq, NULL); } #ifdef CONFIG_PM @@ -1324,7 +1324,7 @@ static int fsl_diu_suspend(struct of_device *ofdev, pm_message_t state) { struct fsl_diu_data *machine_data; - machine_data = dev_get_drvdata(&ofdev->dev); + machine_data = dev_get_drvdata(&dev->dev); disable_lcdc(machine_data->fsl_diu_info[0]); return 0; @@ -1334,7 +1334,7 @@ static int fsl_diu_resume(struct of_device *ofdev) { struct fsl_diu_data *machine_data; - machine_data = dev_get_drvdata(&ofdev->dev); + machine_data = dev_get_drvdata(&dev->dev); enable_lcdc(machine_data->fsl_diu_info[0]); return 0; @@ -1353,7 +1353,8 @@ static int allocate_buf(struct diu_addr *buf, u32 size, u32 bytes_align) dma_addr_t paddr = 0; ssize = size + bytes_align; - buf->vaddr = dma_alloc_coherent(0, ssize, &paddr, GFP_DMA | __GFP_ZERO); + buf->vaddr = dma_alloc_coherent(NULL, ssize, &paddr, GFP_DMA | + __GFP_ZERO); if (!buf->vaddr) return -ENOMEM; @@ -1371,7 +1372,7 @@ static int allocate_buf(struct diu_addr *buf, u32 size, u32 bytes_align) static void free_buf(struct diu_addr *buf, u32 size, u32 bytes_align) { - dma_free_coherent(0, size + bytes_align, + dma_free_coherent(NULL, size + bytes_align, buf->vaddr, (buf->paddr - buf->offset)); return; } @@ -1411,7 +1412,7 @@ static ssize_t show_monitor(struct device *device, return diu_ops.show_monitor_port(machine_data->monitor_port, buf); } -static int fsl_diu_probe(struct of_device *ofdev, +static int __devinit fsl_diu_probe(struct of_device *ofdev, const struct of_device_id *match) { struct device_node *np = ofdev->node; diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index 7dcda187d9ba..fafe7db20d6d 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -1246,7 +1246,7 @@ static int pxafb_resume(struct platform_device *dev) * cache. Once this area is remapped, all virtual memory * access to the video memory should occur at the new region. */ -static int __init pxafb_map_video_memory(struct pxafb_info *fbi) +static int __devinit pxafb_map_video_memory(struct pxafb_info *fbi) { /* * We reserve one page for the palette, plus the size @@ -1348,7 +1348,7 @@ decode_mode: pxafb_decode_mode_info(fbi, inf->modes, inf->num_modes); } -static struct pxafb_info * __init pxafb_init_fbinfo(struct device *dev) +static struct pxafb_info * __devinit pxafb_init_fbinfo(struct device *dev) { struct pxafb_info *fbi; void *addr; @@ -1410,7 +1410,7 @@ static struct pxafb_info * __init pxafb_init_fbinfo(struct device *dev) } #ifdef CONFIG_FB_PXA_PARAMETERS -static int __init parse_opt_mode(struct device *dev, const char *this_opt) +static int __devinit parse_opt_mode(struct device *dev, const char *this_opt) { struct pxafb_mach_info *inf = dev->platform_data; @@ -1469,7 +1469,7 @@ done: return 0; } -static int __init parse_opt(struct device *dev, char *this_opt) +static int __devinit parse_opt(struct device *dev, char *this_opt) { struct pxafb_mach_info *inf = dev->platform_data; struct pxafb_mode_info *mode = &inf->modes[0]; @@ -1567,7 +1567,7 @@ static int __init parse_opt(struct device *dev, char *this_opt) return 0; } -static int __init pxafb_parse_options(struct device *dev, char *options) +static int __devinit pxafb_parse_options(struct device *dev, char *options) { char *this_opt; int ret; @@ -1588,8 +1588,8 @@ static int __init pxafb_parse_options(struct device *dev, char *options) static char g_options[256] __devinitdata = ""; -#ifndef CONFIG_MODULES -static int __devinit pxafb_setup_options(void) +#ifndef MODULE +static int __init pxafb_setup_options(void) { char *options = NULL; @@ -1613,7 +1613,7 @@ MODULE_PARM_DESC(options, "LCD parameters (see Documentation/fb/pxafb.txt)"); #define pxafb_setup_options() (0) #endif -static int __init pxafb_probe(struct platform_device *dev) +static int __devinit pxafb_probe(struct platform_device *dev) { struct pxafb_info *fbi; struct pxafb_mach_info *inf; @@ -1685,14 +1685,14 @@ static int __init pxafb_probe(struct platform_device *dev) if (r == NULL) { dev_err(&dev->dev, "no I/O memory resource defined\n"); ret = -ENODEV; - goto failed; + goto failed_fbi; } r = request_mem_region(r->start, r->end - r->start + 1, dev->name); if (r == NULL) { dev_err(&dev->dev, "failed to request I/O memory\n"); ret = -EBUSY; - goto failed; + goto failed_fbi; } fbi->mmio_base = ioremap(r->start, r->end - r->start + 1); @@ -1735,8 +1735,17 @@ static int __init pxafb_probe(struct platform_device *dev) * This makes sure that our colour bitfield * descriptors are correctly initialised. */ - pxafb_check_var(&fbi->fb.var, &fbi->fb); - pxafb_set_par(&fbi->fb); + ret = pxafb_check_var(&fbi->fb.var, &fbi->fb); + if (ret) { + dev_err(&dev->dev, "failed to get suitable mode\n"); + goto failed_free_irq; + } + + ret = pxafb_set_par(&fbi->fb); + if (ret) { + dev_err(&dev->dev, "Failed to set parameters\n"); + goto failed_free_irq; + } platform_set_drvdata(dev, fbi); @@ -1744,7 +1753,7 @@ static int __init pxafb_probe(struct platform_device *dev) if (ret < 0) { dev_err(&dev->dev, "Failed to register framebuffer device: %d\n", ret); - goto failed_free_irq; + goto failed_free_cmap; } #ifdef CONFIG_CPU_FREQ @@ -1763,18 +1772,23 @@ static int __init pxafb_probe(struct platform_device *dev) return 0; +failed_free_cmap: + if (fbi->fb.cmap.len) + fb_dealloc_cmap(&fbi->fb.cmap); failed_free_irq: free_irq(irq, fbi); -failed_free_res: - release_mem_region(r->start, r->end - r->start + 1); -failed_free_io: - iounmap(fbi->mmio_base); failed_free_mem: dma_free_writecombine(&dev->dev, fbi->map_size, fbi->map_cpu, fbi->map_dma); -failed: +failed_free_io: + iounmap(fbi->mmio_base); +failed_free_res: + release_mem_region(r->start, r->end - r->start + 1); +failed_fbi: + clk_put(fbi->clk); platform_set_drvdata(dev, NULL); kfree(fbi); +failed: return ret; } @@ -1787,7 +1801,7 @@ static struct platform_driver pxafb_driver = { }, }; -static int __devinit pxafb_init(void) +static int __init pxafb_init(void) { if (pxafb_setup_options()) return -EINVAL; diff --git a/drivers/video/w100fb.c b/drivers/video/w100fb.c index 30469bf906e5..d0674f1e3f10 100644 --- a/drivers/video/w100fb.c +++ b/drivers/video/w100fb.c @@ -1003,6 +1003,7 @@ static struct w100_pll_info xtal_14318000[] = { static struct w100_pll_info xtal_16000000[] = { /*freq M N_int N_fac tfgoal lock_time */ { 72, 1, 8, 0, 0xe0, 48}, /* tfgoal guessed */ + { 80, 1, 9, 0, 0xe0, 13}, /* tfgoal guessed */ { 95, 1, 10, 7, 0xe0, 38}, /* tfgoal guessed */ { 96, 1, 11, 0, 0xe0, 36}, /* tfgoal guessed */ { 0, 0, 0, 0, 0, 0}, diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 619a6f8d65a2..47ed39b52f9c 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -18,6 +18,7 @@ * frame buffer. */ +#include <linux/console.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fb.h> @@ -42,37 +43,68 @@ struct xenfb_info { struct xenfb_page *page; unsigned long *mfns; int update_wanted; /* XENFB_TYPE_UPDATE wanted */ + int feature_resize; /* XENFB_TYPE_RESIZE ok */ + struct xenfb_resize resize; /* protected by resize_lock */ + int resize_dpy; /* ditto */ + spinlock_t resize_lock; struct xenbus_device *xbdev; }; -static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) +enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT }; +static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT }; +module_param_array(video, int, NULL, 0); +MODULE_PARM_DESC(video, + "Video memory size in MB, width, height in pixels (default 2,800,600)"); + +static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); -static void xenfb_init_shared_page(struct xenfb_info *); +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); +static void xenfb_send_event(struct xenfb_info *info, + union xenfb_out_event *event) +{ + u32 prod; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = *event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + static void xenfb_do_update(struct xenfb_info *info, int x, int y, int w, int h) { union xenfb_out_event event; - u32 prod; + memset(&event, 0, sizeof(event)); event.type = XENFB_TYPE_UPDATE; event.update.x = x; event.update.y = y; event.update.width = w; event.update.height = h; - prod = info->page->out_prod; /* caller ensures !xenfb_queue_full() */ - mb(); /* ensure ring space available */ - XENFB_OUT_RING_REF(info->page, prod) = event; - wmb(); /* ensure ring contents visible */ - info->page->out_prod = prod + 1; + xenfb_send_event(info, &event); +} - notify_remote_via_irq(info->irq); +static void xenfb_do_resize(struct xenfb_info *info) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.resize = info->resize; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); } static int xenfb_queue_full(struct xenfb_info *info) @@ -84,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info) return prod - cons == XENFB_OUT_RING_LEN; } +static void xenfb_handle_resize_dpy(struct xenfb_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->resize_lock, flags); + if (info->resize_dpy) { + if (!xenfb_queue_full(info)) { + info->resize_dpy = 0; + xenfb_do_resize(info); + } + } + spin_unlock_irqrestore(&info->resize_lock, flags); +} + static void xenfb_refresh(struct xenfb_info *info, int x1, int y1, int w, int h) { unsigned long flags; - int y2 = y1 + h - 1; int x2 = x1 + w - 1; + int y2 = y1 + h - 1; + + xenfb_handle_resize_dpy(info); if (!info->update_wanted) return; @@ -222,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, return res; } +static int +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + int required_mem_len; + + xenfb_info = info->par; + + if (!xenfb_info->feature_resize) { + if (var->xres == video[KPARAM_WIDTH] && + var->yres == video[KPARAM_HEIGHT] && + var->bits_per_pixel == xenfb_info->page->depth) { + return 0; + } + return -EINVAL; + } + + /* Can't resize past initial width and height */ + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) + return -EINVAL; + + required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8; + if (var->bits_per_pixel == xenfb_info->page->depth && + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && + required_mem_len <= info->fix.smem_len) { + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + return 0; + } + return -EINVAL; +} + +static int xenfb_set_par(struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + unsigned long flags; + + xenfb_info = info->par; + + spin_lock_irqsave(&xenfb_info->resize_lock, flags); + xenfb_info->resize.type = XENFB_TYPE_RESIZE; + xenfb_info->resize.width = info->var.xres; + xenfb_info->resize.height = info->var.yres; + xenfb_info->resize.stride = info->fix.line_length; + xenfb_info->resize.depth = info->var.bits_per_pixel; + xenfb_info->resize.offset = 0; + xenfb_info->resize_dpy = 1; + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); + return 0; +} + static struct fb_ops xenfb_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, @@ -230,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = { .fb_fillrect = xenfb_fillrect, .fb_copyarea = xenfb_copyarea, .fb_imageblit = xenfb_imageblit, + .fb_check_var = xenfb_check_var, + .fb_set_par = xenfb_set_par, }; static irqreturn_t xenfb_event_handler(int rq, void *dev_id) @@ -258,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, { struct xenfb_info *info; struct fb_info *fb_info; + int fb_size; + int val; int ret; info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -265,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + + /* Limit kernel param videoram amount to what is in xenstore */ + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { + if (val < video[KPARAM_MEM]) + video[KPARAM_MEM] = val; + } + + /* If requested res does not fit in available memory, use default */ + fb_size = video[KPARAM_MEM] * 1024 * 1024; + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8 + > fb_size) { + video[KPARAM_WIDTH] = XENFB_WIDTH; + video[KPARAM_HEIGHT] = XENFB_HEIGHT; + fb_size = XENFB_DEFAULT_FB_LEN; + } + dev->dev.driver_data = info; info->xbdev = dev; info->irq = -1; info->x1 = info->y1 = INT_MAX; spin_lock_init(&info->dirty_lock); + spin_lock_init(&info->resize_lock); - info->fb = vmalloc(xenfb_mem_len); + info->fb = vmalloc(fb_size); if (info->fb == NULL) goto error_nomem; - memset(info->fb, 0, xenfb_mem_len); + memset(info->fb, 0, fb_size); - info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); if (!info->mfns) @@ -287,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (!info->page) goto error_nomem; - xenfb_init_shared_page(info); - /* abusing framebuffer_alloc() to allocate pseudo_palette */ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); if (fb_info == NULL) @@ -301,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->screen_base = info->fb; fb_info->fbops = &xenfb_fb_ops; - fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; - fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; - fb_info->var.bits_per_pixel = info->page->depth; + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; + fb_info->var.bits_per_pixel = XENFB_DEPTH; fb_info->var.red = (struct fb_bitfield){16, 8, 0}; fb_info->var.green = (struct fb_bitfield){8, 8, 0}; @@ -315,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->var.vmode = FB_VMODE_NONINTERLACED; fb_info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_info->fix.line_length = info->page->line_length; + fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8; fb_info->fix.smem_start = 0; - fb_info->fix.smem_len = xenfb_mem_len; + fb_info->fix.smem_len = fb_size; strcpy(fb_info->fix.id, "xen"); fb_info->fix.type = FB_TYPE_PACKED_PIXELS; fb_info->fix.accel = FB_ACCEL_NONE; @@ -334,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->fbdefio = &xenfb_defio; fb_deferred_io_init(fb_info); + xenfb_init_shared_page(info, fb_info); + ret = register_framebuffer(fb_info); if (ret) { fb_deferred_io_cleanup(fb_info); @@ -348,6 +468,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (ret < 0) goto error; + xenfb_make_preferred_console(); return 0; error_nomem: @@ -358,12 +479,34 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, return ret; } +static __devinit void +xenfb_make_preferred_console(void) +{ + struct console *c; + + if (console_set_on_cmdline) + return; + + acquire_console_sem(); + for (c = console_drivers; c; c = c->next) { + if (!strcmp(c->name, "tty") && c->index == 0) + break; + } + release_console_sem(); + if (c) { + unregister_console(c); + c->flags |= CON_CONSDEV; + c->flags &= ~CON_PRINTBUFFER; /* don't print again */ + register_console(c); + } +} + static int xenfb_resume(struct xenbus_device *dev) { struct xenfb_info *info = dev->dev.driver_data; xenfb_disconnect_backend(info); - xenfb_init_shared_page(info); + xenfb_init_shared_page(info, info->fb_info); return xenfb_connect_backend(dev, info); } @@ -391,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address) return pfn_to_mfn(vmalloc_to_pfn(address)); } -static void xenfb_init_shared_page(struct xenfb_info *info) +static void xenfb_init_shared_page(struct xenfb_info *info, + struct fb_info *fb_info) { int i; + int epd = PAGE_SIZE / sizeof(info->mfns[0]); for (i = 0; i < info->nr_pages; i++) info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); - info->page->pd[0] = vmalloc_to_mfn(info->mfns); - info->page->pd[1] = 0; - info->page->width = XENFB_WIDTH; - info->page->height = XENFB_HEIGHT; - info->page->depth = XENFB_DEPTH; - info->page->line_length = (info->page->depth / 8) * info->page->width; - info->page->mem_length = xenfb_mem_len; + for (i = 0; i * epd < info->nr_pages; i++) + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); + + info->page->width = fb_info->var.xres; + info->page->height = fb_info->var.yres; + info->page->depth = fb_info->var.bits_per_pixel; + info->page->line_length = fb_info->fix.line_length; + info->page->mem_length = fb_info->fix.smem_len; info->page->in_cons = info->page->in_prod = 0; info->page->out_cons = info->page->out_prod = 0; } @@ -504,6 +650,11 @@ InitWait: val = 0; if (val) info->update_wanted = 1; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-resize", "%d", &val) < 0) + val = 0; + info->feature_resize = val; break; case XenbusStateClosing: @@ -547,4 +698,6 @@ static void __exit xenfb_cleanup(void) module_init(xenfb_init); module_exit(xenfb_cleanup); +MODULE_DESCRIPTION("Xen virtual framebuffer device frontend"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:vfb"); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 37af04f1ffd9..363286c54290 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += grant-table.o features.o events.o +obj-y += grant-table.o features.o events.o manage.o obj-y += xenbus/ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ab25ba6cbbb9..591bc29b55f5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages) page = balloon_next_page(page); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); @@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages) balloon_append(pfn_to_page(pfn)); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); @@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work) } /* Resets the Xen limit, sets new target, and kicks off processing. */ -void balloon_set_new_target(unsigned long target) +static void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.hard_limit = ~0UL; @@ -483,7 +483,7 @@ static int dealloc_pte_fn( .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&mfn; + set_xen_guest_handle(reservation.extent_start, &mfn); set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); @@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages) .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&gmfn; + set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); if (ret == 1) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 76e5b7386af9..332dd63750a0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq) spin_lock(&irq_mapping_update_lock); - if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) BUG(); @@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq) evtchn_to_irq[evtchn] = -1; irq_info[irq] = IRQ_UNBOUND; - dynamic_irq_init(irq); + dynamic_irq_cleanup(irq); } spin_unlock(&irq_mapping_update_lock); @@ -557,6 +557,33 @@ out: put_cpu(); } +/* Rebind a new event channel to an existing irq. */ +void rebind_evtchn_irq(int evtchn, int irq) +{ + /* Make sure the irq is masked, since the new event channel + will also be masked. */ + disable_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + /* After resume the irq<->evtchn mappings are all cleared out */ + BUG_ON(evtchn_to_irq[evtchn] != -1); + /* Expect irq to have been bound before, + so the bindcount should be non-0 */ + BUG_ON(irq_bindcount[irq] == 0); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + + spin_unlock(&irq_mapping_update_lock); + + /* new event channels are always bound to cpu 0 */ + irq_set_affinity(irq, cpumask_of_cpu(0)); + + /* Unmask the event channel. */ + enable_irq(irq); +} + /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { @@ -647,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) return ret; } +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_VIRQ); + BUG_ON(irq_info[irq].index != virq); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_IPI); + BUG_ON(irq_info[irq].index != ipi); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + + } +} + +void xen_irq_resume(void) +{ + unsigned int cpu, irq, evtchn; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq].evtchn = 0; /* zap event-channel binding */ + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } +} + static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", .mask = disable_dynirq, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41b909d..e9e11168616a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return 0; } -static int gnttab_resume(void) +int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); } -static int gnttab_suspend(void) +int gnttab_suspend(void) { arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c new file mode 100644 index 000000000000..5b546e365f00 --- /dev/null +++ b/drivers/xen/manage.c @@ -0,0 +1,252 @@ +/* + * Handle extern requests for shutdown, reboot and sysrq + */ +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/reboot.h> +#include <linux/sysrq.h> +#include <linux/stop_machine.h> +#include <linux/freezer.h> + +#include <xen/xenbus.h> +#include <xen/grant_table.h> +#include <xen/events.h> +#include <xen/hvc-console.h> +#include <xen/xen-ops.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> + +enum shutdown_state { + SHUTDOWN_INVALID = -1, + SHUTDOWN_POWEROFF = 0, + SHUTDOWN_SUSPEND = 2, + /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + report a crash, not be instructed to crash! + HALT is the same as POWEROFF, as far as we're concerned. The tools use + the distinction when we return the reason code to them. */ + SHUTDOWN_HALT = 4, +}; + +/* Ignore multiple shutdown requests. */ +static enum shutdown_state shutting_down = SHUTDOWN_INVALID; + +#ifdef CONFIG_PM_SLEEP +static int xen_suspend(void *data) +{ + int *cancelled = data; + int err; + + BUG_ON(!irqs_disabled()); + + load_cr3(swapper_pg_dir); + + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", + err); + return err; + } + + xen_mm_pin_all(); + gnttab_suspend(); + xen_pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled + * or the domain was merely checkpointed, and 0 if it + * is resuming in a new domain. + */ + *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + xen_post_suspend(*cancelled); + gnttab_resume(); + xen_mm_unpin_all(); + + device_power_up(); + + if (!*cancelled) { + xen_irq_resume(); + xen_console_resume(); + } + + return 0; +} + +static void do_suspend(void) +{ + int err; + int cancelled = 1; + + shutting_down = SHUTDOWN_SUSPEND; + +#ifdef CONFIG_PREEMPT + /* If the kernel is preemptible, we need to freeze all the processes + to prevent them from being in the middle of a pagetable update + during suspend. */ + err = freeze_processes(); + if (err) { + printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + return; + } +#endif + + err = device_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + goto out; + } + + printk("suspending xenbus...\n"); + /* XXX use normal device tree? */ + xenbus_suspend(); + + err = stop_machine_run(xen_suspend, &cancelled, 0); + if (err) { + printk(KERN_ERR "failed to start xen_suspend: %d\n", err); + goto out; + } + + if (!cancelled) + xenbus_resume(); + else + xenbus_suspend_cancel(); + + device_resume(); + + /* Make sure timer events get retriggered on all CPUs */ + clock_was_set(); +out: +#ifdef CONFIG_PREEMPT + thaw_processes(); +#endif + shutting_down = SHUTDOWN_INVALID; +} +#endif /* CONFIG_PM_SLEEP */ + +static void shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + char *str; + struct xenbus_transaction xbt; + int err; + + if (shutting_down != SHUTDOWN_INVALID) + return; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); + /* Ignore read errors and empty reads. */ + if (XENBUS_IS_ERR_READ(str)) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) { + kfree(str); + goto again; + } + + if (strcmp(str, "poweroff") == 0 || + strcmp(str, "halt") == 0) { + shutting_down = SHUTDOWN_POWEROFF; + orderly_poweroff(false); + } else if (strcmp(str, "reboot") == 0) { + shutting_down = SHUTDOWN_POWEROFF; /* ? */ + ctrl_alt_del(); +#ifdef CONFIG_PM_SLEEP + } else if (strcmp(str, "suspend") == 0) { + do_suspend(); +#endif + } else { + printk(KERN_INFO "Ignoring shutdown request: %s\n", str); + shutting_down = SHUTDOWN_INVALID; + } + + kfree(str); +} + +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char sysrq_key = '\0'; + struct xenbus_transaction xbt; + int err; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { + printk(KERN_ERR "Unable to read sysrq code in " + "control/sysrq\n"); + xenbus_transaction_end(xbt, 1); + return; + } + + if (sysrq_key != '\0') + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + + if (sysrq_key != '\0') + handle_sysrq(sysrq_key, NULL); +} + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static struct xenbus_watch sysrq_watch = { + .node = "control/sysrq", + .callback = sysrq_handler +}; + +static int setup_shutdown_watcher(void) +{ + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) { + printk(KERN_ERR "Failed to set shutdown watcher\n"); + return err; + } + + err = register_xenbus_watch(&sysrq_watch); + if (err) { + printk(KERN_ERR "Failed to set sysrq watcher\n"); + return err; + } + + return 0; +} + +static int shutdown_event(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + setup_shutdown_watcher(); + return NOTIFY_DONE; +} + +static int __init setup_shutdown_event(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = shutdown_event + }; + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(setup_shutdown_event); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 6efbe3f29ca5..090c61ee8fd0 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len) int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; - int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -216,18 +215,20 @@ int xb_init_comms(void) intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) - unbind_from_irqhandler(xenbus_irq, &xb_waitq); + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; + } - err = bind_evtchn_to_irqhandler( - xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; + xenbus_irq = err; } - xenbus_irq = err; - return 0; } diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index fd01d90cada5..57997fa14e69 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -51,4 +51,4 @@ int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); void v9fs_dentry_release(struct dentry *); -int v9fs_uflags2omode(int uflags); +int v9fs_uflags2omode(int uflags, int extended); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 0d55affe37d4..52944d2249a4 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -59,7 +59,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); v9ses = v9fs_inode2v9ses(inode); - omode = v9fs_uflags2omode(file->f_flags); + omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses)); fid = file->private_data; if (!fid) { fid = v9fs_fid_clone(file->f_path.dentry); @@ -75,6 +75,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) inode->i_size = 0; inode->i_blocks = 0; } + if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) + generic_file_llseek(file, 0, SEEK_END); } file->private_data = fid; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 40fa807bd929..c95295c65045 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -132,10 +132,10 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) /** * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits * @uflags: flags to convert - * + * @extended: if .u extensions are active */ -int v9fs_uflags2omode(int uflags) +int v9fs_uflags2omode(int uflags, int extended) { int ret; @@ -155,14 +155,16 @@ int v9fs_uflags2omode(int uflags) break; } - if (uflags & O_EXCL) - ret |= P9_OEXCL; - if (uflags & O_TRUNC) ret |= P9_OTRUNC; - if (uflags & O_APPEND) - ret |= P9_OAPPEND; + if (extended) { + if (uflags & O_EXCL) + ret |= P9_OEXCL; + + if (uflags & O_APPEND) + ret |= P9_OAPPEND; + } return ret; } @@ -506,7 +508,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, flags = O_RDWR; fid = v9fs_create(v9ses, dir, dentry, NULL, perm, - v9fs_uflags2omode(flags)); + v9fs_uflags2omode(flags, v9fs_extended(v9ses))); if (IS_ERR(fid)) { err = PTR_ERR(fid); fid = NULL; diff --git a/fs/Kconfig b/fs/Kconfig index cf12c403b8c7..313b2e06ded5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -830,7 +830,7 @@ config NTFS_FS from the project web site. For more information see <file:Documentation/filesystems/ntfs.txt> - and <http://linux-ntfs.sourceforge.net/>. + and <http://www.linux-ntfs.org/>. To compile this file system support as a module, choose M here: the module will be called ntfs. @@ -930,7 +930,7 @@ config PROC_KCORE config PROC_VMCORE bool "/proc/vmcore support (EXPERIMENTAL)" - depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP + depends on PROC_FS && CRASH_DUMP default y help Exports the dump image of crashed kernel in ELF format. diff --git a/fs/block_dev.c b/fs/block_dev.c index 470c10ceb0fb..10d8a0aa871a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) struct gendisk *disk; int ret; int part; + int perm = 0; - ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (file->f_mode & FMODE_READ) + perm |= MAY_READ; + if (file->f_mode & FMODE_WRITE) + perm |= MAY_WRITE; + /* + * hooks: /n/, see "layering violations". + */ + ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) return ret; diff --git a/fs/buffer.c b/fs/buffer.c index a073f3f4f013..0f51c0f7c266 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block(SWRITE_SYNC, 1, &bh); brelse(bh); spin_lock(lock); } @@ -2940,16 +2940,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE) + if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); else if (test_set_buffer_locked(bh)) continue; - if (rw == WRITE || rw == SWRITE) { + if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + if (rw == SWRITE_SYNC) + submit_bh(WRITE_SYNC, bh); + else + submit_bh(WRITE, bh); continue; } } else { @@ -2978,7 +2981,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); diff --git a/fs/dcache.c b/fs/dcache.c index 3ee588d5f585..6068c25b393c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -17,6 +17,7 @@ #include <linux/syscalls.h> #include <linux/string.h> #include <linux/mm.h> +#include <linux/fdtable.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/slab.h> @@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry) /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. - * Called with dcache_lock and per dentry lock held, drops both. */ static void dentry_iput(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry) * d_kill - kill dentry and return parent * @dentry: dentry to kill * - * Called with dcache_lock and d_lock, releases both. The dentry must - * already be unhashed and removed from the LRU. + * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. */ static struct dentry *d_kill(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct dentry *parent; @@ -383,11 +386,11 @@ restart: * Try to prune ancestors as well. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. - * - * Called with dcache_lock, drops it and then regains. - * Called with dentry->d_lock held, drops it. */ static void prune_one_dentry(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) + __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2) * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... - * - * On return, dcache_lock will have been unlocked. */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) + __releases(dcache_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -1743,11 +1745,9 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); - goto shouldnt_be_hashed; } -static int prepend(char **buffer, int *buflen, const char *str, - int namelen) +static int prepend(char **buffer, int *buflen, const char *str, int namelen) { *buflen -= namelen; if (*buflen < 0) @@ -1757,8 +1757,13 @@ static int prepend(char **buffer, int *buflen, const char *str, return 0; } +static int prepend_name(char **buffer, int *buflen, struct qstr *name) +{ + return prepend(buffer, buflen, name->name, name->len); +} + /** - * d_path - return the path of a dentry + * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in @@ -1779,9 +1784,10 @@ char *__d_path(const struct path *path, struct path *root, { struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; - char * end = buffer+buflen; - char * retval; + char *end = buffer + buflen; + char *retval; + spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); if (!IS_ROOT(dentry) && d_unhashed(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) @@ -1800,38 +1806,37 @@ char *__d_path(const struct path *path, struct path *root, break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { - spin_unlock(&vfsmount_lock); goto global_root; } dentry = vfsmnt->mnt_mountpoint; vfsmnt = vfsmnt->mnt_parent; - spin_unlock(&vfsmount_lock); continue; } parent = dentry->d_parent; prefetch(parent); - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; retval = end; dentry = parent; } +out: + spin_unlock(&vfsmount_lock); return retval; global_root: retval += 1; /* hit the slash */ - if (prepend(&retval, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) + if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) goto Elong; root->mnt = vfsmnt; root->dentry = dentry; - return retval; + goto out; + Elong: - return ERR_PTR(-ENAMETOOLONG); + retval = ERR_PTR(-ENAMETOOLONG); + goto out; } /** @@ -1845,9 +1850,9 @@ Elong: * * Returns the buffer or an error code if the path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. */ -char *d_path(struct path *path, char *buf, int buflen) +char *d_path(const struct path *path, char *buf, int buflen) { char *res; struct path root; @@ -1915,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) retval = end-1; *retval = '/'; - for (;;) { - struct dentry *parent; - if (IS_ROOT(dentry)) - break; + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; - parent = dentry->d_parent; prefetch(parent); - - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; @@ -1975,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { + if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 50c994a249a5..09a4522f65e6 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -575,13 +575,11 @@ int ecryptfs_init_ecryptfs_miscdev(void) int rc; atomic_set(&ecryptfs_num_miscdev_opens, 0); - mutex_lock(&ecryptfs_daemon_hash_mux); rc = misc_register(&ecryptfs_miscdev); if (rc) printk(KERN_ERR "%s: Failed to register miscellaneous device " "for communications with userspace daemons; rc = [%d]\n", __func__, rc); - mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index fe3119a71ada..2845425077e8 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2875,8 +2875,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) { i_size_write(inode, off+len-towrite); EXT3_I(inode)->i_disksize = inode->i_size; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb96f127c366..02bf24343979 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3337,8 +3337,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) { i_size_write(inode, off+len-towrite); EXT4_I(inode)->i_disksize = inode->i_size; diff --git a/fs/libfs.c b/fs/libfs.c index 892d41cb3382..baeb71ee1cde 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -512,6 +512,20 @@ void simple_release_fs(struct vfsmount **mount, int *count) mntput(mnt); } +/** + * simple_read_from_buffer - copy data from the buffer to user space + * @to: the user space buffer to read to + * @count: the maximum number of bytes to read + * @ppos: the current position in the buffer + * @from: the buffer to read from + * @available: the size of the buffer + * + * The simple_read_from_buffer() function reads up to @count bytes from the + * buffer @from at offset @ppos into the user space address starting at @to. + * + * On success, the number of bytes read is returned and the offset @ppos is + * advanced by this number, or negative value is returned on error. + **/ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { @@ -528,6 +542,20 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, return count; } +/** + * memory_read_from_buffer - copy data from the buffer + * @to: the kernel space buffer to read to + * @count: the maximum number of bytes to read + * @ppos: the current position in the buffer + * @from: the buffer to read from + * @available: the size of the buffer + * + * The memory_read_from_buffer() function reads up to @count bytes from the + * buffer @from at offset @ppos into the kernel space address starting at @to. + * + * On success, the number of bytes read is returned and the offset @ppos is + * advanced by this number, or negative value is returned on error. + **/ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available) { diff --git a/fs/locks.c b/fs/locks.c index 11dbf08651b7..dce8c747371c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) /* insert into file's list */ fl->fl_next = *pos; *pos = fl; - - if (fl->fl_ops && fl->fl_ops->fl_insert) - fl->fl_ops->fl_insert(fl); } /* @@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) fl->fl_fasync = NULL; } - if (fl->fl_ops && fl->fl_ops->fl_remove) - fl->fl_ops->fl_remove(fl); - if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; diff --git a/fs/namei.c b/fs/namei.c index c7e43536c49a..01e67dddcc3d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd int result; /* make sure the stuff we saved doesn't go away */ - dget(save.dentry); - mntget(save.mnt); + path_get(&save); result = __link_path_walk(name, nd); if (result == -ESTALE) { /* nd->path had been dropped */ nd->path = save; - dget(nd->path.dentry); - mntget(nd->path.mnt); + path_get(&nd->path); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } @@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->flags = flags; nd->depth = 0; - nd->path.mnt = mntget(mnt); - nd->path.dentry = dget(dentry); + nd->path.dentry = dentry; + nd->path.mnt = mnt; + path_get(&nd->path); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && @@ -2857,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; void *cookie; + int res; nd.depth = 0; cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!IS_ERR(cookie)) { - int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); - cookie = ERR_PTR(res); - } - return PTR_ERR(cookie); + if (IS_ERR(cookie)) + return PTR_ERR(cookie); + + res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + return res; } int vfs_follow_link(struct nameidata *nd, const char *link) diff --git a/fs/open.c b/fs/open.c index a1450086e92f..a99ad09c3197 100644 --- a/fs/open.c +++ b/fs/open.c @@ -16,6 +16,7 @@ #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> +#include <linux/securebits.h> #include <linux/security.h> #include <linux/mount.h> #include <linux/vfs.h> @@ -425,7 +426,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { struct nameidata nd; int old_fsuid, old_fsgid; - kernel_cap_t old_cap; + kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ @@ -433,23 +434,27 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) old_fsuid = current->fsuid; old_fsgid = current->fsgid; - old_cap = current->cap_effective; current->fsuid = current->uid; current->fsgid = current->gid; - /* - * Clear the capabilities if we switch to a non-root user - * - * FIXME: There is a race here against sys_capset. The - * capabilities can change yet we will restore the old - * value below. We should hold task_capabilities_lock, - * but we cannot because user_path_walk can sleep. - */ - if (current->uid) - cap_clear(current->cap_effective); - else - current->cap_effective = current->cap_permitted; + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + /* + * Clear the capabilities if we switch to a non-root user + */ +#ifndef CONFIG_SECURITY_FILE_CAPABILITIES + /* + * FIXME: There is a race here against sys_capset. The + * capabilities can change yet we will restore the old + * value below. We should hold task_capabilities_lock, + * but we cannot because user_path_walk can sleep. + */ +#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ + if (current->uid) + old_cap = cap_set_effective(__cap_empty_set); + else + old_cap = cap_set_effective(current->cap_permitted); + } res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (res) @@ -478,7 +483,9 @@ out_path_release: out: current->fsuid = old_fsuid; current->fsgid = old_fsgid; - current->cap_effective = old_cap; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) + cap_set_effective(old_cap); return res; } diff --git a/fs/pipe.c b/fs/pipe.c index ec228bc9f882..700f4e0d9572 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void) void free_write_pipe(struct file *f) { free_pipe_info(f->f_dentry->d_inode); - dput(f->f_path.dentry); - mntput(f->f_path.mnt); + path_put(&f->f_path); put_filp(f); } @@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf) return ERR_PTR(-ENFILE); /* Grab pipe from the writer */ - f->f_path.mnt = mntget(wrf->f_path.mnt); - f->f_path.dentry = dget(wrf->f_path.dentry); + f->f_path = wrf->f_path; + path_get(&wrf->f_path); f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; f->f_pos = 0; @@ -1068,8 +1067,7 @@ int do_pipe(int *fd) err_fdr: put_unused_fd(fdr); err_read_pipe: - dput(fr->f_dentry); - mntput(fr->f_vfsmnt); + path_put(&fr->f_path); put_filp(fr); err_write_pipe: free_write_pipe(fw); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 7e277f2ad466..c652d469dc08 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +int __attribute__((weak)) arch_report_meminfo(char *page) +{ + return 0; +} + static int meminfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, len += hugetlb_report_meminfo(page + len); + len += arch_report_meminfo(page + len); + return proc_calc_metrics(page, start, off, count, eof, len); #undef K } @@ -472,6 +479,13 @@ static const struct file_operations proc_vmalloc_operations = { }; #endif +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + static int show_stat(struct seq_file *p, void *v) { int i; @@ -509,7 +523,9 @@ static int show_stat(struct seq_file *p, void *v) sum += temp; per_irq_sum[j] += temp; } + sum += arch_irq_stat_cpu(i); } + sum += arch_irq_stat(); seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab8ccc9d14ff..c492449f3b45 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -476,10 +476,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { - static struct mm_walk clear_refs_walk; - memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); - clear_refs_walk.pmd_entry = clear_refs_pte_range; - clear_refs_walk.mm = mm; + struct mm_walk clear_refs_walk = { + .pmd_entry = clear_refs_pte_range, + .mm = mm, + }; down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; @@ -602,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return err; } -static struct mm_walk pagemap_walk = { - .pmd_entry = pagemap_pte_range, - .pte_hole = pagemap_pte_hole -}; - /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * @@ -641,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, struct pagemapread pm; int pagecount; int ret = -ESRCH; + struct mm_walk pagemap_walk; + unsigned long src; + unsigned long svpfn; + unsigned long start_vaddr; + unsigned long end_vaddr; if (!task) goto out; @@ -659,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!mm) goto out_task; - ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; uend = (unsigned long)(buf + count); pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; - pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + ret = 0; + if (pagecount == 0) + goto out_mm; + pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + ret = -ENOMEM; if (!pages) goto out_mm; @@ -684,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, pm.out = (u64 *)buf; pm.end = (u64 *)(buf + count); - if (!ptrace_may_attach(task)) { - ret = -EIO; - } else { - unsigned long src = *ppos; - unsigned long svpfn = src / PM_ENTRY_BYTES; - unsigned long start_vaddr = svpfn << PAGE_SHIFT; - unsigned long end_vaddr = TASK_SIZE_OF(task); - - /* watch out for wraparound */ - if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) - start_vaddr = end_vaddr; - - /* - * The odds are that this will stop walking way - * before end_vaddr, because the length of the - * user buffer is tracked in "pm", and the walk - * will stop when we hit the end of the buffer. - */ - ret = walk_page_range(start_vaddr, end_vaddr, - &pagemap_walk); - if (ret == PM_END_OF_BUFFER) - ret = 0; - /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += (char *)pm.out - buf; - if (!ret) - ret = (char *)pm.out - buf; - } + pagemap_walk.pmd_entry = pagemap_pte_range; + pagemap_walk.pte_hole = pagemap_pte_hole; + pagemap_walk.mm = mm; + pagemap_walk.private = ± + + src = *ppos; + svpfn = src / PM_ENTRY_BYTES; + start_vaddr = svpfn << PAGE_SHIFT; + end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += (char *)pm.out - buf; + if (!ret) + ret = (char *)pm.out - buf; out_pages: for (; pagecount; pagecount--) { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index ed424d708e69..1d40f2bd1970 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2165,8 +2165,10 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); inode->i_version++; diff --git a/fs/udf/super.c b/fs/udf/super.c index 7a5f69be6ac2..44cc702f96cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent) /* * Check whether there is an anchor block in the given block */ -static int udf_check_anchor_block(struct super_block *sb, sector_t block, - bool varconv) +static int udf_check_anchor_block(struct super_block *sb, sector_t block) { - struct buffer_head *bh = NULL; - tag *t; + struct buffer_head *bh; uint16_t ident; - uint32_t location; - if (varconv) { - if (udf_fixed_to_variable(block) >= - sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) - return 0; - bh = sb_bread(sb, udf_fixed_to_variable(block)); - } - else - bh = sb_bread(sb, block); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && + udf_fixed_to_variable(block) >= + sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) + return 0; + bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return 0; - - t = (tag *)bh->b_data; - ident = le16_to_cpu(t->tagIdent); - location = le32_to_cpu(t->tagLocation); brelse(bh); - if (ident != TAG_IDENT_AVDP) - return 0; - return location == block; + + return ident == TAG_IDENT_AVDP; } /* Search for an anchor volume descriptor pointer */ -static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, - sector_t lastblock) +static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock) { sector_t last[6]; int i; @@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, sb->s_blocksize_bits) continue; - if (udf_check_anchor_block(sb, last[i], varconv)) { + if (udf_check_anchor_block(sb, last[i])) { sbi->s_anchor[0] = last[i]; sbi->s_anchor[1] = last[i] - 256; return last[i]; @@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, if (last[i] < 256) continue; - if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { + if (udf_check_anchor_block(sb, last[i] - 256)) { sbi->s_anchor[1] = last[i] - 256; return last[i]; } } - if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 256)) { sbi->s_anchor[0] = sbi->s_session + 256; return last[0]; } - if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 512)) { sbi->s_anchor[0] = sbi->s_session + 512; return last[0]; } @@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb) int i; struct udf_sb_info *sbi = UDF_SB(sb); - lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); if (lastblock) goto check_anchor; /* No anchor found? Try VARCONV conversion of block numbers */ + UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); /* Firstly, we try to not convert number of the last block */ - lastblock = udf_scan_anchors(sb, 1, + lastblock = udf_scan_anchors(sb, udf_variable_to_fixed(sbi->s_last_block)); - if (lastblock) { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + if (lastblock) goto check_anchor; - } /* Secondly, we try with converted number of the last block */ - lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); - if (lastblock) - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); + if (!lastblock) { + /* VARCONV didn't help. Clear it. */ + UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); + } check_anchor: /* diff --git a/fs/utimes.c b/fs/utimes.c index af059d5cb485..b6b664e7145e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif -static bool nsec_special(long nsec) -{ - return nsec == UTIME_OMIT || nsec == UTIME_NOW; -} - static bool nsec_valid(long nsec) { - if (nsec_special(nsec)) + if (nsec == UTIME_OMIT || nsec == UTIME_NOW) return true; return nsec >= 0 && nsec <= 999999999; @@ -102,7 +97,11 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (error) goto dput_and_out; - /* Don't worry, the checks are done in inode_change_ok() */ + if (times && times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + + /* In most cases, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { error = -EPERM; @@ -124,28 +123,34 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } - /* - * If times is NULL or both times are either UTIME_OMIT or - * UTIME_NOW, then need to check permissions, because - * inode_change_ok() won't do it. - */ - if (!times || (nsec_special(times[0].tv_nsec) && - nsec_special(times[1].tv_nsec))) { + /* + * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT + * cases, we need to make an extra check that is not done by + * inode_change_ok(). + */ + if (((times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_OMIT) + || + (times[0].tv_nsec == UTIME_OMIT && + times[1].tv_nsec == UTIME_NOW)) + && !is_owner_or_cap(inode)) + goto mnt_drop_write_and_out; + } else { + + /* + * If times is NULL (or both times are UTIME_NOW), + * then we need to check permissions, because + * inode_change_ok() won't do it. + */ error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { - if (f) { - if (!(f->f_mode & FMODE_WRITE)) - goto mnt_drop_write_and_out; - } else { - error = vfs_permission(&nd, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; - } + error = permission(inode, MAY_WRITE, NULL); + if (error) + goto mnt_drop_write_and_out; } } mutex_lock(&inode->i_mutex); @@ -169,14 +174,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ if (utimes) { if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) return -EFAULT; - if ((tstimes[0].tv_nsec == UTIME_OMIT || - tstimes[0].tv_nsec == UTIME_NOW) && - tstimes[0].tv_sec != 0) - return -EINVAL; - if ((tstimes[1].tv_nsec == UTIME_OMIT || - tstimes[1].tv_nsec == UTIME_NOW) && - tstimes[1].tv_sec != 0) - return -EINVAL; /* Nothing to do, we must not even check the path. */ if (tstimes[0].tv_nsec == UTIME_OMIT && diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 92a6d91d0c1a..7cd25b8e7c9a 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -1,6 +1,6 @@ header-y += kvm.h -ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h) +ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),) unifdef-y += a.out.h endif unifdef-y += auxvec.h diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 85fd0aa27a8c..4ec0a296bdec 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -2,7 +2,7 @@ #define _ASM_GENERIC_ATOMIC_H /* * Copyright (C) 2005 Silicon Graphics, Inc. - * Christoph Lameter <[email protected]> + * Christoph Lameter * * Allows to provide arch independent atomic definitions without the need to * edit all arch specific atomic.h files. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 44ef329531c3..4fce3db2cecc 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -197,6 +197,63 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd) } #endif /* CONFIG_MMU */ +static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + /* + * Get the current pte state, but zero it out to make it + * non-present, preventing the hardware from asynchronously + * updating it. + */ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void __ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + /* + * The pte is non-present, so there's no hardware state to + * preserve. + */ + set_pte_at(mm, addr, ptep, pte); +} + +#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +/* + * Start a pte protection read-modify-write transaction, which + * protects against asynchronous hardware modifications to the pte. + * The intention is not to prevent the hardware from making pte + * updates, but to prevent any updates it may make from being lost. + * + * This does not protect against other software modifications of the + * pte; the appropriate pte lock must be held over the transation. + * + * Note that this interface is intended to be batchable, meaning that + * ptep_modify_prot_commit may not actually update the pte, but merely + * queue the update to be done at some later time. The update must be + * actually committed before the pte lock is released, however. + */ +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + return __ptep_modify_prot_start(mm, addr, ptep); +} + +/* + * Commit an update to a pte, leaving any hardware-controlled bits in + * the PTE unmodified. + */ +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + __ptep_modify_prot_commit(mm, addr, ptep, pte); +} +#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ + /* * A facility to provide lazy MMU batching. This allows PTE updates and * page invalidations to be delayed until a call to leave lazy MMU mode diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778e916c..f1992dc5c424 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -93,6 +93,8 @@ VMLINUX_SYMBOL(__end_rio_route_ops) = .; \ } \ \ + TRACEDATA \ + \ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ @@ -318,6 +320,18 @@ __stop___bug_table = .; \ } +#ifdef CONFIG_PM_TRACE +#define TRACEDATA \ + . = ALIGN(4); \ + .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \ + __tracedata_start = .; \ + *(.tracedata) \ + __tracedata_end = .; \ + } +#else +#define TRACEDATA +#endif + #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_notes) = .; \ diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h index a798d6299a79..1232be3885b0 100644 --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -283,10 +283,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " beqz %0, 2f \n" " subu %0, %1, %3 \n" " .set reorder \n" - "1: \n" " .subsection 2 \n" "2: b 1b \n" " .previous \n" + "1: \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) @@ -664,10 +664,10 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " beqz %0, 2f \n" " dsubu %0, %1, %3 \n" " .set reorder \n" - "1: \n" " .subsection 2 \n" "2: b 1b \n" " .previous \n" + "1: \n" " .set mips0 \n" : "=&r" (result), "=&r" (temp), "=m" (v->counter) : "Ir" (i), "m" (v->counter) diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild index 7381916dfcbb..bca352e033c3 100644 --- a/include/asm-powerpc/Kbuild +++ b/include/asm-powerpc/Kbuild @@ -1,6 +1,5 @@ include include/asm-generic/Kbuild.asm -header-y += a.out.h header-y += auxvec.h header-y += ioctls.h header-y += mman.h diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 14411c9de46f..635d764dc13e 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -28,6 +28,7 @@ #include <asm/numa.h> #include <asm/processor.h> #include <asm/mmu.h> +#include <asm/mpspec.h> #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -160,9 +161,7 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; extern int acpi_scan_nodes(unsigned long start, unsigned long end); -#ifdef CONFIG_X86_64 -# define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -#endif +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); #else diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h new file mode 100644 index 000000000000..30a12049353b --- /dev/null +++ b/include/asm-x86/amd_iommu.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <[email protected]> + * Leo Duran <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_H +#define _ASM_X86_AMD_IOMMU_H + +#ifdef CONFIG_AMD_IOMMU +extern int amd_iommu_init(void); +extern int amd_iommu_init_dma_ops(void); +extern void amd_iommu_detect(void); +#else +static inline int amd_iommu_init(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } +#endif + +#endif diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h new file mode 100644 index 000000000000..7bfcb47cc452 --- /dev/null +++ b/include/asm-x86/amd_iommu_types.h @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <[email protected]> + * Leo Duran <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __AMD_IOMMU_TYPES_H__ +#define __AMD_IOMMU_TYPES_H__ + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +/* + * some size calculation constants + */ +#define DEV_TABLE_ENTRY_SIZE 256 +#define ALIAS_TABLE_ENTRY_SIZE 2 +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* helper macros */ +#define LOW_U32(x) ((x) & ((1ULL << 32)-1)) +#define HIGH_U32(x) (LOW_U32((x) >> 32)) + +/* Length of the MMIO region for the AMD IOMMU */ +#define MMIO_REGION_LENGTH 0x4000 + +/* Capability offsets used by the driver */ +#define MMIO_CAP_HDR_OFFSET 0x00 +#define MMIO_RANGE_OFFSET 0x0c + +/* Masks, shifts and macros to parse the device range capability */ +#define MMIO_RANGE_LD_MASK 0xff000000 +#define MMIO_RANGE_FD_MASK 0x00ff0000 +#define MMIO_RANGE_BUS_MASK 0x0000ff00 +#define MMIO_RANGE_LD_SHIFT 24 +#define MMIO_RANGE_FD_SHIFT 16 +#define MMIO_RANGE_BUS_SHIFT 8 +#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) +#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) +#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) + +/* Flag masks for the AMD IOMMU exclusion range */ +#define MMIO_EXCL_ENABLE_MASK 0x01ULL +#define MMIO_EXCL_ALLOW_MASK 0x02ULL + +/* Used offsets into the MMIO space */ +#define MMIO_DEV_TABLE_OFFSET 0x0000 +#define MMIO_CMD_BUF_OFFSET 0x0008 +#define MMIO_EVT_BUF_OFFSET 0x0010 +#define MMIO_CONTROL_OFFSET 0x0018 +#define MMIO_EXCL_BASE_OFFSET 0x0020 +#define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_CMD_HEAD_OFFSET 0x2000 +#define MMIO_CMD_TAIL_OFFSET 0x2008 +#define MMIO_EVT_HEAD_OFFSET 0x2010 +#define MMIO_EVT_TAIL_OFFSET 0x2018 +#define MMIO_STATUS_OFFSET 0x2020 + +/* feature control bits */ +#define CONTROL_IOMMU_EN 0x00ULL +#define CONTROL_HT_TUN_EN 0x01ULL +#define CONTROL_EVT_LOG_EN 0x02ULL +#define CONTROL_EVT_INT_EN 0x03ULL +#define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_PASSPW_EN 0x08ULL +#define CONTROL_RESPASSPW_EN 0x09ULL +#define CONTROL_COHERENT_EN 0x0aULL +#define CONTROL_ISOC_EN 0x0bULL +#define CONTROL_CMDBUF_EN 0x0cULL +#define CONTROL_PPFLOG_EN 0x0dULL +#define CONTROL_PPFINT_EN 0x0eULL + +/* command specific defines */ +#define CMD_COMPL_WAIT 0x01 +#define CMD_INV_DEV_ENTRY 0x02 +#define CMD_INV_IOMMU_PAGES 0x03 + +#define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 + +#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL + +/* macros and definitions for device table entries */ +#define DEV_ENTRY_VALID 0x00 +#define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_IR 0x3d +#define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_EX 0x67 +#define DEV_ENTRY_SYSMGT1 0x68 +#define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_INIT_PASS 0xb8 +#define DEV_ENTRY_EINT_PASS 0xb9 +#define DEV_ENTRY_NMI_PASS 0xba +#define DEV_ENTRY_LINT0_PASS 0xbe +#define DEV_ENTRY_LINT1_PASS 0xbf + +/* constants to configure the command buffer */ +#define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_ENTRIES 512 +#define MMIO_CMD_SIZE_SHIFT 56 +#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) + +#define PAGE_MODE_1_LEVEL 0x01 +#define PAGE_MODE_2_LEVEL 0x02 +#define PAGE_MODE_3_LEVEL 0x03 + +#define IOMMU_PDE_NL_0 0x000ULL +#define IOMMU_PDE_NL_1 0x200ULL +#define IOMMU_PDE_NL_2 0x400ULL +#define IOMMU_PDE_NL_3 0x600ULL + +#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) +#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) +#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) + +#define IOMMU_MAP_SIZE_L1 (1ULL << 21) +#define IOMMU_MAP_SIZE_L2 (1ULL << 30) +#define IOMMU_MAP_SIZE_L3 (1ULL << 39) + +#define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_U (1ULL << 59) +#define IOMMU_PTE_FC (1ULL << 60) +#define IOMMU_PTE_IR (1ULL << 61) +#define IOMMU_PTE_IW (1ULL << 62) + +#define IOMMU_L1_PDE(address) \ + ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define IOMMU_L2_PDE(address) \ + ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) +#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) + +#define IOMMU_PROT_MASK 0x03 +#define IOMMU_PROT_IR 0x01 +#define IOMMU_PROT_IW 0x02 + +/* IOMMU capabilities */ +#define IOMMU_CAP_IOTLB 24 +#define IOMMU_CAP_NPCACHE 26 + +#define MAX_DOMAIN_ID 65536 + +struct protection_domain { + spinlock_t lock; + u16 id; + int mode; + u64 *pt_root; + void *priv; +}; + +struct dma_ops_domain { + struct list_head list; + struct protection_domain domain; + unsigned long aperture_size; + unsigned long next_bit; + unsigned long *bitmap; + u64 **pte_pages; +}; + +struct amd_iommu { + struct list_head list; + spinlock_t lock; + + u16 devid; + u16 cap_ptr; + + u64 mmio_phys; + u8 *mmio_base; + u32 cap; + u16 first_device; + u16 last_device; + u64 exclusion_start; + u64 exclusion_length; + + u8 *cmd_buf; + u32 cmd_buf_size; + + int need_sync; + + struct dma_ops_domain *default_dom; +}; + +extern struct list_head amd_iommu_list; + +struct dev_table_entry { + u32 data[8]; +}; + +struct unity_map_entry { + struct list_head list; + u16 devid_start; + u16 devid_end; + u64 address_start; + u64 address_end; + int prot; +}; + +extern struct list_head amd_iommu_unity_map; + +/* data structures for device handling */ +extern struct dev_table_entry *amd_iommu_dev_table; +extern u16 *amd_iommu_alias_table; +extern struct amd_iommu **amd_iommu_rlookup_table; + +extern unsigned amd_iommu_aperture_order; + +extern u16 amd_iommu_last_bdf; + +/* data structures for protection domain handling */ +extern struct protection_domain **amd_iommu_pd_table; +extern unsigned long *amd_iommu_pd_alloc_bitmap; + +extern int amd_iommu_isolate; + +static inline void print_devid(u16 devid, int nl) +{ + int bus = devid >> 8; + int dev = devid >> 3 & 0x1f; + int fn = devid & 0x07; + + printk("%02x:%02x.%x", bus, dev, fn); + if (nl) + printk("\n"); +} + +#endif diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index be9639a9a186..4e2c1e517f06 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -36,15 +36,11 @@ extern void generic_apic_probe(void); #ifdef CONFIG_X86_LOCAL_APIC extern int apic_verbosity; -extern int timer_over_8254; extern int local_apic_timer_c2_ok; -extern int local_apic_timer_disabled; -extern int apic_runs_main_timer; extern int ioapic_force; -extern int disable_apic; -extern int disable_apic_timer; +extern int disable_apic; /* * Basic functions accessing APICs. */ @@ -125,16 +121,22 @@ extern void enable_NMI_through_LVT0(void); */ #ifdef CONFIG_X86_64 extern void early_init_lapic_mapping(void); +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} #endif extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); -extern int apic_is_clustered_box(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h index 90dec0c23646..97220321f39d 100644 --- a/include/asm-x86/asm.h +++ b/include/asm-x86/asm.h @@ -1,37 +1,40 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef CONFIG_X86_32 -/* 32 bits */ - -# define _ASM_PTR " .long " -# define _ASM_ALIGN " .balign 4 " -# define _ASM_MOV_UL " movl " - -# define _ASM_INC " incl " -# define _ASM_DEC " decl " -# define _ASM_ADD " addl " -# define _ASM_SUB " subl " -# define _ASM_XADD " xaddl " - +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x) x +# define __ASM_EX_SEC .section __ex_table #else -/* 64 bits */ +# define __ASM_FORM(x) " " #x " " +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" +#endif -# define _ASM_PTR " .quad " -# define _ASM_ALIGN " .balign 8 " -# define _ASM_MOV_UL " movq " - -# define _ASM_INC " incq " -# define _ASM_DEC " decq " -# define _ASM_ADD " addq " -# define _ASM_SUB " subq " -# define _ASM_XADD " xaddq " - -#endif /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_32 +# define __ASM_SEL(a,b) __ASM_FORM(a) +#else +# define __ASM_SEL(a,b) __ASM_FORM(b) +#endif + +#define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q) +#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) +#define _ASM_MOV_UL __ASM_SIZE(mov) + +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) /* Exception table entry */ # define _ASM_EXTABLE(from,to) \ - " .section __ex_table,\"a\"\n" \ + __ASM_EX_SEC \ _ASM_ALIGN "\n" \ _ASM_PTR #from "," #to "\n" \ " .previous\n" diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index 3e0cd7d38335..a0095191c02e 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -11,12 +11,6 @@ * resource counting etc.. */ -#ifdef CONFIG_SMP -#define LOCK "lock ; " -#else -#define LOCK "" -#endif - /* * Make sure gcc doesn't try to be clever and move things around * on us. We need to use _exactly_ the address the user gave us, @@ -431,6 +425,32 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) return c != (u); } +/** + * atomic_inc_short - increment of a short integer + * @v: pointer to type int + * + * Atomically adds 1 to @v + * Returns the new value of @u + */ +static inline short int atomic_inc_short(short int *v) +{ + asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); + return *v; +} + +/** + * atomic_or_long - OR of two long integers + * @v1: pointer to type unsigned long + * @v2: pointer to type unsigned long + * + * Atomically ORs @v1 and @v2 + * Returns the result of the OR + */ +static inline void atomic_or_long(unsigned long *v1, unsigned long v2) +{ + asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); +} + #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) /* These are x86-specific, used by some header files */ diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h index b4a46b7be794..0033e50c13b2 100644 --- a/include/asm-x86/bios_ebda.h +++ b/include/asm-x86/bios_ebda.h @@ -14,4 +14,6 @@ static inline unsigned int get_bios_ebda(void) return address; /* 0 means none */ } +void reserve_ebda_region(void); + #endif /* _MACH_BIOS_EBDA_H */ diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index ee4b3ead6a43..96b1829cea15 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -23,11 +23,21 @@ #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) /* Technically wrong, but this avoids compilation errors on some gcc versions. */ -#define ADDR "=m" (*(volatile long *) addr) +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) #else -#define ADDR "+m" (*(volatile long *) addr) +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) #endif +#define ADDR BITOP_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) + /** * set_bit - Atomically set a bit in memory * @nr: the bit to set @@ -43,9 +53,17 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(unsigned int nr, volatile unsigned long *addr) { - asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "orb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX "bts %1,%0" + : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + } } /** @@ -57,7 +75,7 @@ static inline void set_bit(int nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile void *addr) +static inline void __set_bit(int nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -72,9 +90,17 @@ static inline void __set_bit(int nr, volatile void *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(int nr, volatile unsigned long *addr) { - asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "andb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btr %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } } /* @@ -85,13 +111,13 @@ static inline void clear_bit(int nr, volatile void *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void *addr) +static inline void __clear_bit(int nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -108,7 +134,7 @@ static inline void __clear_bit(int nr, volatile void *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -126,7 +152,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(int nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -140,7 +166,7 @@ static inline void __change_bit(int nr, volatile void *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(int nr, volatile void *addr) +static inline void change_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); } @@ -153,7 +179,7 @@ static inline void change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -170,7 +196,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile void *addr) +static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -184,7 +210,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile void *addr) +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -203,7 +229,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -223,7 +249,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile void *addr) +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -235,7 +261,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile void *addr) +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -255,7 +281,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -266,13 +292,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return oldbit; } -static inline int constant_test_bit(int nr, const volatile void *addr) +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; } -static inline int variable_test_bit(int nr, volatile const void *addr) +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) { int oldbit; diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index f62f4733606b..ae22bdf0ab14 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -11,6 +11,7 @@ /* setup data types */ #define SETUP_NONE 0 +#define SETUP_E820_EXT 1 /* extensible setup data list node */ struct setup_data { @@ -40,6 +41,7 @@ struct setup_header { __u8 type_of_loader; __u8 loadflags; #define LOADED_HIGH (1<<0) +#define QUIET_FLAG (1<<5) #define KEEP_SEGMENTS (1<<6) #define CAN_USE_HEAP (1<<7) __u16 setup_move_size; diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h index d9b26b9a28cf..17463ccf8166 100644 --- a/include/asm-x86/cmpxchg_64.h +++ b/include/asm-x86/cmpxchg_64.h @@ -93,6 +93,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return old; } +/* + * Always use locked operations when touching memory shared with a + * hypervisor, since the system may be SMP even if the guest kernel + * isn't. + */ +static inline unsigned long __sync_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + asm volatile("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + asm volatile("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + asm volatile("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + static inline unsigned long __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, int size) @@ -139,6 +172,10 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ (unsigned long)(n), \ sizeof(*(ptr)))) +#define sync_cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c837a41..84a56da397b1 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -106,6 +106,7 @@ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -142,11 +143,11 @@ extern const char * const x86_power_flags[32]; #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) #define setup_clear_cpu_cap(bit) do { \ clear_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, cleared_cpu_caps); \ + set_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, cleared_cpu_caps); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h index d2526d3f7346..7515c19d4988 100644 --- a/include/asm-x86/current.h +++ b/include/asm-x86/current.h @@ -1,5 +1,39 @@ +#ifndef _X86_CURRENT_H +#define _X86_CURRENT_H + #ifdef CONFIG_X86_32 -# include "current_32.h" -#else -# include "current_64.h" -#endif +#include <linux/compiler.h> +#include <asm/percpu.h> + +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); +static __always_inline struct task_struct *get_current(void) +{ + return x86_read_percpu(current_task); +} + +#else /* X86_32 */ + +#ifndef __ASSEMBLY__ +#include <asm/pda.h> + +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return read_pda(pcurrent); +} + +#else /* __ASSEMBLY__ */ + +#include <asm/asm-offsets.h> +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif /* __ASSEMBLY__ */ + +#endif /* X86_32 */ + +#define current get_current() + +#endif /* X86_CURRENT_H */ diff --git a/include/asm-x86/current_32.h b/include/asm-x86/current_32.h deleted file mode 100644 index 5af9bdb97a16..000000000000 --- a/include/asm-x86/current_32.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _I386_CURRENT_H -#define _I386_CURRENT_H - -#include <linux/compiler.h> -#include <asm/percpu.h> - -struct task_struct; - -DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#define current get_current() - -#endif /* !(_I386_CURRENT_H) */ diff --git a/include/asm-x86/current_64.h b/include/asm-x86/current_64.h deleted file mode 100644 index 2d368ede2fc1..000000000000 --- a/include/asm-x86/current_64.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _X86_64_CURRENT_H -#define _X86_64_CURRENT_H - -#if !defined(__ASSEMBLY__) -struct task_struct; - -#include <asm/pda.h> - -static inline struct task_struct *get_current(void) -{ - struct task_struct *t = read_pda(pcurrent); - return t; -} - -#define current get_current() - -#else - -#ifndef ASM_OFFSET_H -#include <asm/asm-offsets.h> -#endif - -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg - -#endif - -#endif /* !(_X86_64_CURRENT_H) */ diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 268a012bcd79..07f9f2b17be8 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -29,11 +29,17 @@ static inline void fill_ldt(struct desc_struct *desc, extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + #ifdef CONFIG_X86_64 -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -extern struct desc_ptr cpu_gdt_descr[]; -/* the cpu gdt accessor */ -#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address) static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) @@ -51,16 +57,6 @@ static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, } #else -struct gdt_page { - struct desc_struct gdt[GDT_ENTRIES]; -} __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); - -static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) -{ - return per_cpu(gdt_page, cpu).gdt; -} - static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) @@ -311,6 +307,28 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + +extern int first_system_vector; +extern char system_vectors[]; + +static inline void alloc_system_vector(int vector) +{ + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (first_system_vector > vector) + first_system_vector = vector; + } else + BUG(); +} + +static inline void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + /* * This routine sets up an interrupt gate at directory privilege level 3. */ diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h index eccb4ea1f918..f7bacf357dac 100644 --- a/include/asm-x86/desc_defs.h +++ b/include/asm-x86/desc_defs.h @@ -75,10 +75,14 @@ struct ldttss_desc64 { typedef struct gate_struct64 gate_desc; typedef struct ldttss_desc64 ldt_desc; typedef struct ldttss_desc64 tss_desc; +#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32)) +#define gate_segment(g) ((g).segment) #else typedef struct desc_struct gate_desc; typedef struct desc_struct ldt_desc; typedef struct desc_struct tss_desc; +#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff)) +#define gate_segment(g) ((g).a >> 16) #endif struct desc_ptr { diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h index 4edf7514a750..58a86571fe0f 100644 --- a/include/asm-x86/dmi.h +++ b/include/asm-x86/dmi.h @@ -3,12 +3,6 @@ #include <asm/io.h> -#ifdef CONFIG_X86_32 - -#define dmi_alloc alloc_bootmem - -#else /* CONFIG_X86_32 */ - #define DMI_MAX_DATA 2048 extern int dmi_alloc_index; @@ -25,8 +19,6 @@ static inline void *dmi_alloc(unsigned len) return dmi_alloc_data + idx; } -#endif - /* Use early IO mappings for DMI because it's initialized early */ #define dmi_ioremap early_ioremap #define dmi_iounmap early_iounmap diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 7004251fc66b..78c03d7bf441 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -2,6 +2,41 @@ #define __ASM_E820_H #define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include <linux/numa.h> +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + #define E820NR 0x1e8 /* # entries in E820MAP */ #define E820_RAM 1 @@ -9,6 +44,9 @@ #define E820_ACPI 3 #define E820_NVS 4 +/* reserved RAM used by kernel itself */ +#define E820_RESERVED_KERN 128 + #ifndef __ASSEMBLY__ struct e820entry { __u64 addr; /* start of memory segment */ @@ -18,22 +56,78 @@ struct e820entry { struct e820map { __u32 nr_map; - struct e820entry map[E820MAX]; + struct e820entry map[E820_X_MAX]; }; + +/* see comment in arch/x86/kernel/e820.c */ +extern struct e820map e820; +extern struct e820map e820_saved; + +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +extern int +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype); +extern void update_e820(void); +extern void e820_setup_gap(void); +extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr); +struct setup_data; +extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + +extern unsigned long end_user_pfn; + +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern void reserve_early(u64 start, u64 end, char *name); +extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); +extern void free_early(u64 start, u64 end); +extern void early_res_to_bootmem(u64 start, u64 end); +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); + +extern unsigned long e820_end(void); +extern int e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn); +extern void e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); +extern u64 e820_hole_size(u64 start, u64 end); +extern void finish_e820_parsing(void); +extern void e820_reserve_resources(void); +extern void setup_memory_map(void); +extern char *default_machine_specific_memory_setup(void); +extern char *machine_specific_memory_setup(void); +extern char *memory_setup(void); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 +#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) #define BIOS_BEGIN 0x000a0000 #define BIOS_END 0x00100000 #ifdef __KERNEL__ -#ifdef CONFIG_X86_32 -# include "e820_32.h" -#else -# include "e820_64.h" -#endif +#include <linux/ioport.h> + +#define HIGH_MEMORY (1024*1024) #endif /* __KERNEL__ */ #endif /* __ASM_E820_H */ diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h deleted file mode 100644 index a9f7c6ec32bf..000000000000 --- a/include/asm-x86/e820_32.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * structures and definitions for the int 15, ax=e820 memory map - * scheme. - * - * In a nutshell, arch/i386/boot/setup.S populates a scratch table - * in the empty_zero_block that contains a list of usable address/size - * duples. In arch/i386/kernel/setup.c, this information is - * transferred into the e820map, and in arch/i386/mm/init.c, that - * new information is used to mark pages reserved or not. - * - */ -#ifndef __E820_HEADER -#define __E820_HEADER - -#include <linux/ioport.h> - -#define HIGH_MEMORY (1024*1024) - -#ifndef __ASSEMBLY__ - -extern struct e820map e820; -extern void update_e820(void); - -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_any_mapped(u64 start, u64 end, unsigned type); -extern void propagate_e820_map(void); -extern void register_bootmem_low_pages(unsigned long max_low_pfn); -extern void add_memory_region(unsigned long long start, - unsigned long long size, int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern void e820_register_memory(void); -extern void limit_regions(unsigned long long size); -extern void print_memory_map(char *who); -extern void init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource); - -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -extern void e820_mark_nosave_regions(void); -#else -static inline void e820_mark_nosave_regions(void) -{ -} -#endif - - -#endif/*!__ASSEMBLY__*/ -#endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h deleted file mode 100644 index 71c4d685d30d..000000000000 --- a/include/asm-x86/e820_64.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * structures and definitions for the int 15, ax=e820 memory map - * scheme. - * - * In a nutshell, setup.S populates a scratch table in the - * empty_zero_block that contains a list of usable address/size - * duples. setup.c, this information is transferred into the e820map, - * and in init.c/numa.c, that new information is used to mark pages - * reserved or not. - */ -#ifndef __E820_HEADER -#define __E820_HEADER - -#include <linux/ioport.h> - -#ifndef __ASSEMBLY__ -extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align); -extern unsigned long find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align); -extern void add_memory_region(unsigned long start, unsigned long size, - int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern void setup_memory_region(void); -extern void contig_e820_setup(void); -extern unsigned long e820_end_of_ram(void); -extern void e820_reserve_resources(void); -extern void e820_mark_nosave_regions(void); -extern int e820_any_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_any_non_reserved(unsigned long start, unsigned long end); -extern int is_memory_any_valid(unsigned long start, unsigned long end); -extern int e820_all_non_reserved(unsigned long start, unsigned long end); -extern int is_memory_all_valid(unsigned long start, unsigned long end); -extern unsigned long e820_hole_size(unsigned long start, unsigned long end); - -extern void e820_setup_gap(void); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); - -extern void finish_e820_parsing(void); - -extern struct e820map e820; -extern void update_e820(void); - -extern void reserve_early(unsigned long start, unsigned long end, char *name); -extern void free_early(unsigned long start, unsigned long end); -extern void early_res_to_bootmem(unsigned long start, unsigned long end); - -#endif/*!__ASSEMBLY__*/ - -#endif/*__E820_HEADER*/ diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h index d53004b855cc..7ed2bd7a7f51 100644 --- a/include/asm-x86/efi.h +++ b/include/asm-x86/efi.h @@ -90,7 +90,7 @@ extern void *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ -extern void efi_reserve_bootmem(void); +extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index 8f232dc5b5fe..7be4733c793e 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -83,9 +83,9 @@ extern unsigned int vdso_enabled; (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) #include <asm/processor.h> +#include <asm/system.h> #ifdef CONFIG_X86_32 -#include <asm/system.h> /* for savesegment */ #include <asm/desc.h> #define elf_check_arch(x) elf_check_arch_ia32(x) diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h index 5bd206973dca..44d4f8217349 100644 --- a/include/asm-x86/fixmap.h +++ b/include/asm-x86/fixmap.h @@ -7,7 +7,62 @@ # include "fixmap_64.h" #endif +extern int fixmaps_set; + +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); +void native_set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#ifndef CONFIG_PARAVIRT +static inline void __set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags) +{ + native_set_fixmap(idx, phys, flags); +} +#endif + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) + +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) + #define clear_fixmap(idx) \ __set_fixmap(idx, 0, __pgprot(0)) +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} #endif diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index 4b96148e90c1..aae2f0501a40 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -79,10 +79,6 @@ enum fixed_addresses { FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif @@ -103,23 +99,18 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 511), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, FIX_WP_TEST, +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif __end_of_fixed_addresses }; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); extern void reserve_top_address(unsigned long reserve); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) @@ -128,38 +119,5 @@ extern void reserve_top_address(unsigned long reserve); #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} - #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h index 355d26a75a82..6a4789d57e6c 100644 --- a/include/asm-x86/fixmap_64.h +++ b/include/asm-x86/fixmap_64.h @@ -12,6 +12,7 @@ #define _ASM_FIXMAP_64_H #include <linux/kernel.h> +#include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> #include <asm/vsyscall.h> @@ -46,23 +47,32 @@ enum fixed_addresses { FIX_EFI_IO_MAP_LAST_PAGE, FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + MAX_EFI_IO_PAGES - 1, +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif + __end_of_permanent_fixed_addresses, + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 512 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_NESTING 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - + (__end_of_permanent_fixed_addresses & 511), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, __end_of_fixed_addresses }; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) @@ -71,30 +81,4 @@ extern void __set_fixmap(enum fixed_addresses idx, #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without translation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - #endif diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index eeca2f51fd8f..417f76ea677b 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -22,8 +22,9 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern int fix_aperture; #else -#define gart_iommu_aperture 0 -#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture 0 +#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture_disabled 1 static inline void early_gart_iommu_check(void) { diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index b02ea6e17de8..33a73f5ed222 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -119,5 +119,10 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define is_uv_system() 0 #define uv_wakeup_secondary(a, b) 1 +#ifdef CONFIG_X86_IO_APIC +extern void force_mask_ioapic_irq_2(void); +#else +static inline void force_mask_ioapic_irq_2(void) { } +#endif #endif diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 1de931b263ce..647e4e5c2580 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -44,4 +44,12 @@ DECLARE_PER_CPU(int, x2apic_extra_bits); extern void uv_cpu_init(void); extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern void setup_apic_routing(void); + +#ifdef CONFIG_X86_IO_APIC +extern void force_mask_ioapic_irq_2(void); +#else +static inline void force_mask_ioapic_irq_2(void) { } +#endif + #endif diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h index 314434d664e7..000787df66e6 100644 --- a/include/asm-x86/hardirq.h +++ b/include/asm-x86/hardirq.h @@ -3,3 +3,9 @@ #else # include "hardirq_64.h" #endif + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h index e153f3b44774..4514b16cc723 100644 --- a/include/asm-x86/highmem.h +++ b/include/asm-x86/highmem.h @@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *ptr); #define flush_cache_kmaps() do { } while (0) +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index 6a9b4ac59bf7..82f1ac641bd7 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -86,8 +86,8 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); #else /* CONFIG_HPET_TIMER */ static inline int hpet_enable(void) { return 0; } -static inline unsigned long hpet_readl(unsigned long a) { return 0; } static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 #endif #endif /* ASM_X86_HPET_H */ diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index bf025399d939..18f067c310f7 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -1,5 +1,106 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * <[email protected]> + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#include <asm/irq_vectors.h> + +#ifndef __ASSEMBLY__ + +#include <linux/percpu.h> +#include <linux/profile.h> +#include <linux/smp.h> + +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/sections.h> + +#define platform_legacy_irq(irq) ((irq) < 16) + +/* Interrupt handlers registered during init_IRQ */ +extern void apic_timer_interrupt(void); +extern void error_interrupt(void); +extern void spurious_interrupt(void); +extern void thermal_interrupt(void); +extern void reschedule_interrupt(void); + +extern void invalidate_interrupt(void); +extern void invalidate_interrupt0(void); +extern void invalidate_interrupt1(void); +extern void invalidate_interrupt2(void); +extern void invalidate_interrupt3(void); +extern void invalidate_interrupt4(void); +extern void invalidate_interrupt5(void); +extern void invalidate_interrupt6(void); +extern void invalidate_interrupt7(void); + +extern void irq_move_cleanup_interrupt(void); +extern void threshold_interrupt(void); + +extern void call_function_interrupt(void); + +/* PIC specific functions */ +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); + +/* IOAPIC */ +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +extern unsigned long io_apic_irqs; + +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void setup_ioapic_dest(void); + +#ifdef CONFIG_X86_64 +extern void enable_IO_APIC(void); +#endif + +/* IPI functions */ +extern void send_IPI_self(int vector); +extern void send_IPI(int dest, int vector); + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +/* EISA */ +extern void eisa_set_level_irq(unsigned int irq); + +/* Voyager functions */ +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); + #ifdef CONFIG_X86_32 -# include "hw_irq_32.h" +extern void (*const interrupt[NR_IRQS])(void); #else -# include "hw_irq_64.h" +typedef int vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); +extern spinlock_t vector_lock; +#endif +extern void setup_vector_irq(int cpu); + +#endif /* !ASSEMBLY_ */ + #endif diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h deleted file mode 100644 index ea88054e03f3..000000000000 --- a/include/asm-x86/hw_irq_32.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * <[email protected]> - */ - -#include <linux/profile.h> -#include <asm/atomic.h> -#include <asm/irq.h> -#include <asm/sections.h> - -#define NMI_VECTOR 0x02 - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -extern void (*const interrupt[NR_IRQS])(void); - -#ifdef CONFIG_SMP -void reschedule_interrupt(void); -void invalidate_interrupt(void); -void call_function_interrupt(void); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -void apic_timer_interrupt(void); -void error_interrupt(void); -void spurious_interrupt(void); -void thermal_interrupt(void); -#define platform_legacy_irq(irq) ((irq) < 16) -#endif - -void disable_8259A_irq(unsigned int irq); -void enable_8259A_irq(unsigned int irq); -int i8259A_irq_pending(unsigned int irq); -void make_8259A_irq(unsigned int irq); -void init_8259A(int aeoi); -void send_IPI_self(int vector); -void init_VISWS_APIC_irqs(void); -void setup_IO_APIC(void); -void disable_IO_APIC(void); -void print_IO_APIC(void); -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -void send_IPI(int dest, int vector); -void setup_ioapic_dest(void); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - -#endif /* _ASM_HW_IRQ_H */ diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h deleted file mode 100644 index 0062ef390f67..000000000000 --- a/include/asm-x86/hw_irq_64.h +++ /dev/null @@ -1,173 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * <[email protected]> - * - * hacked by Andi Kleen for x86-64. - */ - -#ifndef __ASSEMBLY__ -#include <asm/atomic.h> -#include <asm/irq.h> -#include <linux/profile.h> -#include <linux/smp.h> -#include <linux/percpu.h> -#endif - -#define NMI_VECTOR 0x02 -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define IA32_SYSCALL_VECTOR 0x80 - - -/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering - * cleanup after irq migration. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - -/* - * Vectors 0x30-0x3f are used for ISA interrupts. - */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -/* fb free - please don't readd KDB here because it's useless - (hint - think what a NMI bit does to a vector) */ -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -/* f8 free */ -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x41 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ - - -#ifndef __ASSEMBLY__ - -/* Interrupt handlers registered during init_IRQ */ -void apic_timer_interrupt(void); -void spurious_interrupt(void); -void error_interrupt(void); -void reschedule_interrupt(void); -void call_function_interrupt(void); -void irq_move_cleanup_interrupt(void); -void invalidate_interrupt0(void); -void invalidate_interrupt1(void); -void invalidate_interrupt2(void); -void invalidate_interrupt3(void); -void invalidate_interrupt4(void); -void invalidate_interrupt5(void); -void invalidate_interrupt6(void); -void invalidate_interrupt7(void); -void thermal_interrupt(void); -void threshold_interrupt(void); -void i8254_timer_resume(void); - -typedef int vector_irq_t[NR_VECTORS]; -DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void __setup_vector_irq(int cpu); -extern spinlock_t vector_lock; - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); -extern void send_IPI_self(int vector); -extern void init_VISWS_APIC_irqs(void); -extern void setup_IO_APIC(void); -extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void send_IPI(int dest, int vector); -extern void setup_ioapic_dest(void); -extern void native_init_IRQ(void); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - -#include <asm/ptrace.h> - -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - -#define BUILD_IRQ(nr) \ - asmlinkage void IRQ_NAME(nr); \ - asm("\n.p2align\n" \ - "IRQ" #nr "_interrupt:\n\t" \ - "push $~(" #nr ") ; " \ - "jmp common_interrupt"); - -#define platform_legacy_irq(irq) ((irq) < 16) - -#endif - -#endif /* _ASM_HW_IRQ_H */ diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 45d4df3e51e6..2f98df91f1f2 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port) udelay(2); } +extern struct irq_chip i8259A_chip; + #endif /* __ASM_I8259_H__ */ diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index d5b11f60dbd0..bf5d629b3a39 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -3,6 +3,76 @@ #define ARCH_HAS_IOREMAP_WC +#include <linux/compiler.h> + +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +#ifndef __ASSEMBLY__ +extern void early_ioremap_init(void); +extern void early_ioremap_clear(void); +extern void early_ioremap_reset(void); +extern void *early_ioremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); +#endif + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "q", :"memory") +build_mmio_read(readw, "w", unsigned short, "r", :"memory") +build_mmio_read(readl, "l", unsigned int, "r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "q", ) +build_mmio_read(__readw, "w", unsigned short, "r", ) +build_mmio_read(__readl, "l", unsigned int, "r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 +build_mmio_read(readq, "q", unsigned long, "r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "r", ) +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) + +#define readq_relaxed(a) __readq(a) +#define __raw_readq __readq +#define __raw_writeq writeq + +/* Let people know we have them */ +#define readq readq +#define writeq writeq +#endif + #ifdef CONFIG_X86_32 # include "io_32.h" #else @@ -16,4 +86,17 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void early_ioremap_init(void); +extern void early_ioremap_clear(void); +extern void early_ioremap_reset(void); +extern void *early_ioremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + + #endif /* _ASM_X86_IO_H */ diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 049e81e797a0..4df44ed54077 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -122,18 +122,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) extern void iounmap(volatile void __iomem *addr); /* - * early_ioremap() and early_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void early_ioremap_init(void); -extern void early_ioremap_clear(void); -extern void early_ioremap_reset(void); -extern void *early_ioremap(unsigned long offset, unsigned long size); -extern void early_iounmap(void *addr, unsigned long size); -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); - -/* * ISA I/O bus memory addresses are 1:1 with the physical address. */ #define isa_virt_to_bus virt_to_phys @@ -149,55 +137,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline unsigned char readb(const volatile void __iomem *addr) -{ - return *(volatile unsigned char __force *)addr; -} - -static inline unsigned short readw(const volatile void __iomem *addr) -{ - return *(volatile unsigned short __force *)addr; -} - -static inline unsigned int readl(const volatile void __iomem *addr) -{ - return *(volatile unsigned int __force *) addr; -} - -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -static inline void writeb(unsigned char b, volatile void __iomem *addr) -{ - *(volatile unsigned char __force *)addr = b; -} - -static inline void writew(unsigned short b, volatile void __iomem *addr) -{ - *(volatile unsigned short __force *)addr = b; -} - -static inline void writel(unsigned int b, volatile void __iomem *addr) -{ - *(volatile unsigned int __force *)addr = b; -} -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define mmiowb() - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 0930bedf9e4d..ddd8058a5026 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline __u8 __readb(const volatile void __iomem *addr) -{ - return *(__force volatile __u8 *)addr; -} - -static inline __u16 __readw(const volatile void __iomem *addr) -{ - return *(__force volatile __u16 *)addr; -} - -static __always_inline __u32 __readl(const volatile void __iomem *addr) -{ - return *(__force volatile __u32 *)addr; -} - -static inline __u64 __readq(const volatile void __iomem *addr) -{ - return *(__force volatile __u64 *)addr; -} - -#define readb(x) __readb(x) -#define readw(x) __readw(x) -#define readl(x) __readl(x) -#define readq(x) __readq(x) -#define readb_relaxed(a) readb(a) -#define readw_relaxed(a) readw(a) -#define readl_relaxed(a) readl(a) -#define readq_relaxed(a) readq(a) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq - -#define mmiowb() - -static inline void __writel(__u32 b, volatile void __iomem *addr) -{ - *(__force volatile __u32 *)addr = b; -} - -static inline void __writeq(__u64 b, volatile void __iomem *addr) -{ - *(__force volatile __u64 *)addr = b; -} - -static inline void __writeb(__u8 b, volatile void __iomem *addr) -{ - *(__force volatile __u8 *)addr = b; -} - -static inline void __writew(__u16 b, volatile void __iomem *addr) -{ - *(__force volatile __u16 *)addr = b; -} - -#define writeq(val, addr) __writeq((val), (addr)) -#define writel(val, addr) __writel((val), (addr)) -#define writew(val, addr) __writew((val), (addr)) -#define writeb(val, addr) __writeb((val), (addr)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index d593e14f0341..14f82bbcb5fd 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -11,6 +11,15 @@ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar */ +/* I/O Unit Redirection Table */ +#define IO_APIC_REDIR_VECTOR_MASK 0x000FF +#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 +#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 +#define IO_APIC_REDIR_SEND_PENDING (1 << 12) +#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) +#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) +#define IO_APIC_REDIR_MASKED (1 << 16) + /* * The structure of the IO-APIC: */ @@ -112,21 +121,32 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +struct mp_config_ioapic { + unsigned long mp_apicaddr; + unsigned int mp_apicid; + unsigned char mp_type; + unsigned char mp_apicver; + unsigned char mp_flags; +}; + +struct mp_config_intsrc { + unsigned int mp_dstapic; + unsigned char mp_type; + unsigned char mp_irqtype; + unsigned short mp_irqflag; + unsigned char mp_srcbus; + unsigned char mp_srcbusirq; + unsigned char mp_dstirq; }; /* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; @@ -137,6 +157,9 @@ extern int sis_apic_bug; /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; +/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ +extern int timer_through_8259; + static inline void disable_ioapic_setup(void) { skip_ioapic_setup = 1; @@ -162,6 +185,8 @@ extern void ioapic_init_mappings(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 +static const int timer_through_8259 = 0; +static inline void ioapic_init_mappings(void) { } #endif #endif diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index ecc80f341f37..196d63c28aa4 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -20,6 +20,7 @@ #include <asm/hw_irq.h> #include <asm/apic.h> +#include <asm/smp.h> /* * the following functions deal with sending IPIs between CPUs. diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h index 7ba905465a53..1a2925757317 100644 --- a/include/asm-x86/irq.h +++ b/include/asm-x86/irq.h @@ -1,5 +1,50 @@ -#ifdef CONFIG_X86_32 -# include "irq_32.h" +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * <[email protected]> + */ + +#include <asm/apicdef.h> +#include <asm/irq_vectors.h> + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +#ifdef CONFIG_X86_LOCAL_APIC +# define ARCH_HAS_NMI_WATCHDOG +#endif + +#ifdef CONFIG_4KSTACKS + extern void irq_ctx_init(int cpu); + extern void irq_ctx_exit(int cpu); +# define __ARCH_HAS_DO_SOFTIRQ #else -# include "irq_64.h" +# define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) +# ifdef CONFIG_X86_64 +# define __ARCH_HAS_DO_SOFTIRQ +# endif +#endif + +#ifdef CONFIG_IRQBALANCE +extern int irqbalance_disable(char *str); +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#include <linux/cpumask.h> +extern void fixup_irqs(cpumask_t map); #endif + +extern unsigned int do_IRQ(struct pt_regs *regs); +extern void init_IRQ(void); +extern void native_init_IRQ(void); + +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + +#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h deleted file mode 100644 index 0b79f3185243..000000000000 --- a/include/asm-x86/irq_32.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * <[email protected]> - */ - -#include <linux/sched.h> -/* include comes from machine specific directory */ -#include "irq_vectors.h" -#include <asm/thread_info.h> - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#ifdef CONFIG_X86_LOCAL_APIC -# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - -#ifdef CONFIG_4KSTACKS - extern void irq_ctx_init(int cpu); - extern void irq_ctx_exit(int cpu); -# define __ARCH_HAS_DO_SOFTIRQ -#else -# define irq_ctx_init(cpu) do { } while (0) -# define irq_ctx_exit(cpu) do { } while (0) -#endif - -#ifdef CONFIG_IRQBALANCE -extern int irqbalance_disable(char *str); -#endif - -#ifdef CONFIG_HOTPLUG_CPU -extern void fixup_irqs(cpumask_t map); -#endif - -unsigned int do_IRQ(struct pt_regs *regs); -void init_IRQ(void); -void __init native_init_IRQ(void); - -/* Interrupt vector management */ -extern DECLARE_BITMAP(used_vectors, NR_VECTORS); - -#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h deleted file mode 100644 index 083d35a62c94..000000000000 --- a/include/asm-x86/irq_64.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * <[email protected]> - */ - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by x86_64 processors - * is limited to 256. For processors other than x86_64, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ - -#define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -#define NR_IRQ_VECTORS NR_IRQS - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ - -#ifdef CONFIG_HOTPLUG_CPU -#include <linux/cpumask.h> -extern void fixup_irqs(cpumask_t map); -#endif - -#define __ARCH_HAS_DO_SOFTIRQ 1 - -#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h new file mode 100644 index 000000000000..b58581e2e24e --- /dev/null +++ b/include/asm-x86/irq_vectors.h @@ -0,0 +1,169 @@ +#ifndef _ASM_IRQ_VECTORS_H +#define _ASM_IRQ_VECTORS_H + +#include <linux/threads.h> + +#define NMI_VECTOR 0x02 + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#else +# define IA32_SYSCALL_VECTOR 0x80 +#endif + +/* + * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration on 64 bit. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +/* + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit. + */ +#ifdef CONFIG_X86_32 +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR) +#else +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#endif +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#ifdef CONFIG_X86_32 + +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define INVALIDATE_TLB_VECTOR 0xfd +# define RESCHEDULE_VECTOR 0xfc +# define CALL_FUNCTION_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xf0 + +#else + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +#define NUM_INVALIDATE_TLB_VECTORS 8 + +#endif + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x30-0xee) we + * start at 0x31(0x41) to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +#ifdef CONFIG_X86_32 +# define FIRST_DEVICE_VECTOR 0x31 +#else +# define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#endif + +#define NR_VECTORS 256 + +#define FPU_IRQ 13 + +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#if !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) + +# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) + +# define NR_IRQS 224 + +# if (224 >= 32 * NR_CPUS) +# define NR_IRQ_VECTORS NR_IRQS +# else +# define NR_IRQ_VECTORS (32 * NR_CPUS) +# endif + +# else /* IO_APIC || PARAVIRT */ + +# define NR_IRQS 16 +# define NR_IRQ_VECTORS NR_IRQS + +# endif + +#else /* !VISWS && !VOYAGER */ + +# define NR_IRQS 224 +# define NR_IRQ_VECTORS NR_IRQS + +#endif /* VISWS */ + +/* Voyager specific defines */ +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + + +#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index c242527f970e..17e7a1701c97 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -111,14 +111,35 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 +#define SWAPGS swapgs +/* + * Currently paravirt can't handle swapgs nicely when we + * don't have a stack we can rely on (such as a user space + * stack). So we either find a way around these or just fault + * and emulate if a guest tries to call swapgs directly. + * + * Either way, this is a good way to document that we don't + * have a reliable stack. x86_64 only. + */ +#define SWAPGS_UNSAFE_STACK swapgs + +#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ + #define INTERRUPT_RETURN iretq -#define ENABLE_INTERRUPTS_SYSCALL_RET \ - movq %gs:pda_oldrsp, %rsp; \ - swapgs; \ - sysretq; +#define USERGS_SYSRET64 \ + swapgs; \ + sysretq; +#define USERGS_SYSRET32 \ + swapgs; \ + sysretl +#define ENABLE_INTERRUPTS_SYSEXIT32 \ + swapgs; \ + sti; \ + sysexit + #else #define INTERRUPT_RETURN iret -#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit #define GET_CR0_INTO_EAX movl %cr0, %eax #endif @@ -169,16 +190,6 @@ static inline void trace_hardirqs_fixup(void) #else #ifdef CONFIG_X86_64 -/* - * Currently paravirt can't handle swapgs nicely when we - * don't have a stack we can rely on (such as a user space - * stack). So we either find a way around these or just fault - * and emulate if a guest tries to call swapgs directly. - * - * Either way, this is a good way to document that we don't - * have a reliable stack. x86_64 only. - */ -#define SWAPGS_UNSAFE_STACK swapgs #define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk #define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index bfd9900742bf..76f392146daa 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -71,7 +71,8 @@ static inline long kvm_hypercall0(unsigned int nr) long ret; asm volatile(KVM_HYPERCALL : "=a"(ret) - : "a"(nr)); + : "a"(nr) + : "memory"); return ret; } @@ -80,7 +81,8 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) long ret; asm volatile(KVM_HYPERCALL : "=a"(ret) - : "a"(nr), "b"(p1)); + : "a"(nr), "b"(p1) + : "memory"); return ret; } @@ -90,7 +92,8 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, long ret; asm volatile(KVM_HYPERCALL : "=a"(ret) - : "a"(nr), "b"(p1), "c"(p2)); + : "a"(nr), "b"(p1), "c"(p2) + : "memory"); return ret; } @@ -100,7 +103,8 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, long ret; asm volatile(KVM_HYPERCALL : "=a"(ret) - : "a"(nr), "b"(p1), "c"(p2), "d"(p3)); + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); return ret; } @@ -111,7 +115,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, long ret; asm volatile(KVM_HYPERCALL : "=a"(ret) - : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)); + : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4) + : "memory"); return ret; } diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h index 8327907c79bf..017c8c19ad8f 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/mach-bigsmp/mach_apic.h @@ -81,7 +81,7 @@ static inline int multi_timer_check(int apic, int irq) static inline int apicid_to_node(int logical_apicid) { - return (0); + return apicid_2_node[hard_smp_processor_id()]; } static inline int cpu_present_to_apicid(int mps_cpu) diff --git a/include/asm-x86/mach-bigsmp/mach_mpspec.h b/include/asm-x86/mach-bigsmp/mach_mpspec.h deleted file mode 100644 index 6b5dadcf1d0e..000000000000 --- a/include/asm-x86/mach-bigsmp/mach_mpspec.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -#define MAX_MP_BUSSES 32 - -#endif /* __ASM_MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h deleted file mode 100644 index 881c63ca61ad..000000000000 --- a/include/asm-x86/mach-default/irq_vectors.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * This file should contain #defines for all of the interrupt vector - * numbers used by this architecture. - * - * In addition, there are some standard defines: - * - * FIRST_EXTERNAL_VECTOR: - * The first free place for external interrupts - * - * SYSCALL_VECTOR: - * The IRQ vector a syscall makes the user to kernel transition - * under. - * - * TIMER_IRQ: - * The IRQ number the timer interrupt comes in at. - * - * NR_IRQS: - * The total number of interrupt vectors (including all the - * architecture specific interrupts) needed. - * - */ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by i386 processors - * is limited to 256. For processors other than i386, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#include "irq_vectors_limits.h" - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-default/irq_vectors_limits.h b/include/asm-x86/mach-default/irq_vectors_limits.h deleted file mode 100644 index a90c7a60109f..000000000000 --- a/include/asm-x86/mach-default/irq_vectors_limits.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H - -#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) -#define NR_IRQS 224 -# if (224 >= 32 * NR_CPUS) -# define NR_IRQ_VECTORS NR_IRQS -# else -# define NR_IRQ_VECTORS (32 * NR_CPUS) -# endif -#else -#define NR_IRQS 16 -#define NR_IRQ_VECTORS NR_IRQS -#endif - -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 21003b56ae95..0b2cde5e1b74 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -77,7 +77,11 @@ static inline void setup_apic_routing(void) static inline int apicid_to_node(int logical_apicid) { +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else return 0; +#endif } #endif diff --git a/include/asm-x86/mach-default/setup_arch.h b/include/asm-x86/mach-default/setup_arch.h index 605e3ccb991b..38846208b548 100644 --- a/include/asm-x86/mach-default/setup_arch.h +++ b/include/asm-x86/mach-default/setup_arch.h @@ -1,7 +1,3 @@ /* Hook to call BIOS initialisation function */ /* no action for generic */ - -#ifndef ARCH_SETUP -#define ARCH_SETUP -#endif diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h index 56d0e1fa0258..b63c52182006 100644 --- a/include/asm-x86/mach-default/smpboot_hooks.h +++ b/include/asm-x86/mach-default/smpboot_hooks.h @@ -41,8 +41,10 @@ static inline void __init smpboot_setup_io_apic(void) */ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); - else + else { nr_ioapics = 0; + localise_nmi_watchdog(); + } } static inline void smpboot_clear_io_apic(void) diff --git a/include/asm-x86/mach-es7000/mach_mpspec.h b/include/asm-x86/mach-es7000/mach_mpspec.h deleted file mode 100644 index b1f5039d4506..000000000000 --- a/include/asm-x86/mach-es7000/mach_mpspec.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -#define MAX_MP_BUSSES 256 - -#endif /* __ASM_MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h index 0d0b5ba2e9d1..586cadbf3787 100644 --- a/include/asm-x86/mach-generic/mach_mpparse.h +++ b/include/asm-x86/mach-generic/mach_mpparse.h @@ -1,7 +1,10 @@ #ifndef _MACH_MPPARSE_H #define _MACH_MPPARSE_H 1 -int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -int acpi_madt_oem_check(char *oem_id, char *oem_table_id); + +extern int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); #endif diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h index 75a56e5afbe7..d802465e026a 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/mach-numaq/mach_apic.h @@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void) #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ -#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap) -#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map) +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} #define apicid_cluster(apicid) (apicid & 0xF0) static inline int apic_id_registered(void) @@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - static inline int apicid_to_node(int logical_apicid) { return logical_apicid >> 4; @@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) return physid_mask_of_physid(cpu + 4*node); } -struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - int quad = translation_record->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); - - printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver, quad, logical_apicid); - return logical_apicid; -} - extern void *xquad_portio; static inline void setup_portio_remap(void) diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h index 459b12401187..626aef6b155f 100644 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ b/include/asm-x86/mach-numaq/mach_mpparse.h @@ -1,14 +1,7 @@ #ifndef __ASM_MACH_MPPARSE_H #define __ASM_MACH_MPPARSE_H -extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation); -extern void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation); - -/* Hook from generic ACPI tables.c */ -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); #endif /* __ASM_MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_mpspec.h b/include/asm-x86/mach-numaq/mach_mpspec.h deleted file mode 100644 index dffb09856f8f..000000000000 --- a/include/asm-x86/mach-numaq/mach_mpspec.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 512 - -#define MAX_MP_BUSSES 32 - -#endif /* __ASM_MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-summit/mach_mpspec.h b/include/asm-x86/mach-summit/mach_mpspec.h deleted file mode 100644 index bd765523511a..000000000000 --- a/include/asm-x86/mach-summit/mach_mpspec.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#define MAX_MP_BUSSES 260 - -#endif /* __ASM_MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-visws/irq_vectors.h b/include/asm-x86/mach-visws/irq_vectors.h deleted file mode 100644 index cb572d8db505..000000000000 --- a/include/asm-x86/mach-visws/irq_vectors.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * IRQ definitions - */ -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h index 33f700ef6831..b8f5dd829dba 100644 --- a/include/asm-x86/mach-visws/setup_arch.h +++ b/include/asm-x86/mach-visws/setup_arch.h @@ -4,5 +4,3 @@ extern unsigned long sgivwfb_mem_phys; extern unsigned long sgivwfb_mem_size; /* no action for visws */ - -#define ARCH_SETUP diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h deleted file mode 100644 index 165421f5821c..000000000000 --- a/include/asm-x86/mach-voyager/irq_vectors.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: [email protected] - * - * linux/arch/i386/voyager/irq_vectors.h - * - * This file provides definitions for the VIC and QIC CPIs - */ - -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#ifndef __ASSEMBLY__ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h new file mode 100644 index 000000000000..95beda07c6fa --- /dev/null +++ b/include/asm-x86/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_MMCONFIG_H +#define _ASM_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h index 6598450da6c6..fac57014e7c6 100644 --- a/include/asm-x86/mmu_context.h +++ b/include/asm-x86/mmu_context.h @@ -1,5 +1,37 @@ +#ifndef __ASM_X86_MMU_CONTEXT_H +#define __ASM_X86_MMU_CONTEXT_H + +#include <asm/desc.h> +#include <asm/atomic.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/paravirt.h> +#ifndef CONFIG_PARAVIRT +#include <asm-generic/mm_hooks.h> + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + +/* + * Used for LDT copy/destruction. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context(struct mm_struct *mm); + #ifdef CONFIG_X86_32 # include "mmu_context_32.h" #else # include "mmu_context_64.h" #endif + +#define activate_mm(prev, next) \ +do { \ + paravirt_activate_mm((prev), (next)); \ + switch_mm((prev), (next), NULL); \ +} while (0); + + +#endif /* __ASM_X86_MMU_CONTEXT_H */ diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h index 9756ae0f1dd3..824fc575c6d8 100644 --- a/include/asm-x86/mmu_context_32.h +++ b/include/asm-x86/mmu_context_32.h @@ -1,28 +1,6 @@ #ifndef __I386_SCHED_H #define __I386_SCHED_H -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/pgalloc.h> -#include <asm/tlbflush.h> -#include <asm/paravirt.h> -#ifndef CONFIG_PARAVIRT -#include <asm-generic/mm_hooks.h> - -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) -{ -} -#endif /* !CONFIG_PARAVIRT */ - - -/* - * Used for LDT copy/destruction. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); - - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP @@ -75,10 +53,4 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#define activate_mm(prev, next) \ -do { \ - paravirt_activate_mm((prev), (next)); \ - switch_mm((prev), (next), NULL); \ -} while (0); - #endif diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h index ca44c71e7fb3..c7000634ccae 100644 --- a/include/asm-x86/mmu_context_64.h +++ b/include/asm-x86/mmu_context_64.h @@ -1,21 +1,7 @@ #ifndef __X86_64_MMU_CONTEXT_H #define __X86_64_MMU_CONTEXT_H -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/pgalloc.h> #include <asm/pda.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#ifndef CONFIG_PARAVIRT -#include <asm-generic/mm_hooks.h> -#endif - -/* - * possibly do the LDT unload here? - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { @@ -65,8 +51,4 @@ do { \ asm volatile("movl %0,%%fs"::"r"(0)); \ } while (0) -#define activate_mm(prev, next) \ - switch_mm((prev), (next), NULL) - - #endif diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index cb2cad0b65a7..b2298a227567 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -12,11 +12,9 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#ifdef CONFIG_X86_NUMAQ - #include <asm/numaq.h> -#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */ - #include <asm/srat.h> -#endif +#include <asm/numaq.h> +/* summit or generic arch */ +#include <asm/srat.h> extern int get_memcfg_numa_flat(void); /* @@ -26,28 +24,20 @@ extern int get_memcfg_numa_flat(void); */ static inline void get_memcfg_numa(void) { -#ifdef CONFIG_X86_NUMAQ + if (get_memcfg_numaq()) return; -#elif defined(CONFIG_ACPI_SRAT) if (get_memcfg_from_srat()) return; -#endif - get_memcfg_numa_flat(); } extern int early_pfn_to_nid(unsigned long pfn); -extern void numa_kva_reserve(void); #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat -#define get_zholes_size(n) (0) -static inline void numa_kva_reserve(void) -{ -} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM @@ -55,14 +45,14 @@ static inline void numa_kva_reserve(void) /* * generic node memory support, the following assumptions apply: * - * 1) memory comes in 256Mb contigious chunks which are either present or not + * 1) memory comes in 64Mb contigious chunks which are either present or not * 2) we will not have more than 64Gb in total * * for now assume that 64Gb is max amount of RAM for whole system * 64Gb / 4096bytes/page = 16777216 pages */ #define MAX_NR_PAGES 16777216 -#define MAX_ELEMENTS 256 +#define MAX_ELEMENTS 1024 #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) extern s8 physnode_map[]; @@ -87,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn) __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) -#ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */ -#define pfn_valid(pfn) ((pfn) < num_physpages) -#else static inline int pfn_valid(int pfn) { int nid = pfn_to_nid(pfn); @@ -98,7 +85,6 @@ static inline int pfn_valid(int pfn) return (pfn < node_end_pfn(nid)); return 0; } -#endif /* CONFIG_X86_NUMAQ */ #endif /* CONFIG_DISCONTIGMEM */ diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 57a991b9c053..b6995e567fcc 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS]; extern u8 apicid_2_node[]; extern int pic_mode; +#ifdef CONFIG_X86_NUMAQ +extern int mp_bus_id_to_node[MAX_MP_BUSSES]; +extern int mp_bus_id_to_local[MAX_MP_BUSSES]; +extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +#endif + #define MAX_APICID 256 #else @@ -21,26 +27,30 @@ extern int pic_mode; /* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) +#endif + extern void early_find_smp_config(void); extern void early_get_smp_config(void); -#endif - #if defined(CONFIG_MCA) || defined(CONFIG_EISA) extern int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; - extern unsigned int boot_cpu_physical_apicid; +extern unsigned int max_physical_apicid; extern int smp_found_config; extern int mpc_default_type; extern unsigned long mp_lapic_addr; extern void find_smp_config(void); extern void get_smp_config(void); +#ifdef CONFIG_X86_MPPARSE +extern void early_reserve_e820_mpc_new(void); +#else +static inline void early_reserve_e820_mpc_new(void) { } +#endif void __cpuinit generic_processor_info(int apicid, int version); #ifdef CONFIG_ACPI @@ -49,6 +59,17 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +#ifdef CONFIG_X86_IO_APIC +extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity); +#else +static inline int +mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, + u32 gsi, int triggering, int polarity) +{ + return 0; +} +#endif #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) @@ -101,6 +122,7 @@ typedef struct physid_mask physid_mask_t; __physid_mask; \ }) +/* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ ({ \ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ @@ -108,6 +130,12 @@ typedef struct physid_mask physid_mask_t; __physid_mask; \ }) +static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +{ + physids_clear(*map); + physid_set(physid, *map); +} + #define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } #define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h index dc6ef85e3624..38d1e73b49e4 100644 --- a/include/asm-x86/mpspec_def.h +++ b/include/asm-x86/mpspec_def.h @@ -17,10 +17,11 @@ # define MAX_MPC_ENTRY 1024 # define MAX_APICS 256 #else -/* - * A maximum of 255 APICs with the current APIC ID architecture. - */ -# define MAX_APICS 255 +# if NR_CPUS <= 255 +# define MAX_APICS 255 +# else +# define MAX_APICS 32768 +# endif #endif struct intel_mp_floating { diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 09413ad39d3c..44bce773012e 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -111,7 +111,9 @@ #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_K8_SYSCFG 0xc0010010 #define MSR_K8_HWCR 0xc0010015 -#define MSR_K8_ENABLE_C1E 0xc0010055 +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 #define MSR_K8_TSEG_ADDR 0xc0010112 #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index 3707650a169b..ca110ee73f07 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -18,7 +18,7 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) unsigned long low, high; asm volatile(".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (*aux)); - return low | ((u64)high >> 32); + return low | ((u64)high << 32); } /* @@ -66,7 +66,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high)); + asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); } static inline int native_write_msr_safe(unsigned int msr, @@ -81,7 +81,8 @@ static inline int native_write_msr_safe(unsigned int msr, _ASM_EXTABLE(2b, 3b) : "=a" (err) : "c" (msr), "0" (low), "d" (high), - "i" (-EFAULT)); + "i" (-EFAULT) + : "memory"); return err; } diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e363021e72f..21f8d0202a82 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -15,38 +15,13 @@ */ int do_nmi_callback(struct pt_regs *regs, int cpu); -#ifdef CONFIG_PM - -/** Replace the PM callback routine for NMI. */ -struct pm_dev *set_nmi_pm_callback(pm_callback callback); - -/** Unset the PM callback routine back to the default. */ -void unset_nmi_pm_callback(struct pm_dev *dev); - -#else - -static inline struct pm_dev *set_nmi_pm_callback(pm_callback callback) -{ - return 0; -} - -static inline void unset_nmi_pm_callback(struct pm_dev *dev) -{ -} - -#endif /* CONFIG_PM */ - #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -extern void nmi_watchdog_default(void); -#else -#define nmi_watchdog_default() do {} while (0) #endif +extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -extern int unknown_nmi_panic; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); @@ -62,12 +37,10 @@ extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DISABLED -1 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 -#define NMI_DEFAULT NMI_DISABLED struct ctl_table; struct file; @@ -78,6 +51,24 @@ extern int unknown_nmi_panic; void __trigger_all_cpu_backtrace(void); #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +static inline void localise_nmi_watchdog(void) +{ + if (nmi_watchdog == NMI_IO_APIC) + nmi_watchdog = NMI_LOCAL_APIC; +} + +/* check if nmi_watchdog is active (ie was specified at boot) */ +static inline int nmi_watchdog_active(void) +{ + /* + * actually it should be: + * return (nmi_watchdog == NMI_LOCAL_APIC || + * nmi_watchdog == NMI_IO_APIC) + * but since they are power of two we could use a + * cheaper way --cvg + */ + return nmi_watchdog & 0x3; +} #endif void lapic_watchdog_stop(void); diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h index 03d0f7a9bf02..220d7b7707a0 100644 --- a/include/asm-x86/numa_32.h +++ b/include/asm-x86/numa_32.h @@ -2,14 +2,10 @@ #define _ASM_X86_32_NUMA_H 1 extern int pxm_to_nid(int pxm); +extern void numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA -extern void __init remap_numa_kva(void); -extern void set_highmem_pages_init(int); -#else -static inline void remap_numa_kva(void) -{ -} +extern void set_highmem_pages_init(void); #endif #endif /* _ASM_X86_32_NUMA_H */ diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 22e87c9f6a80..3830094434a9 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -14,32 +14,30 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; -extern void numa_set_node(int cpu, int node); extern void srat_reserve_add_area(int nodeid); extern int hotadd_percent; extern s16 apicid_to_node[MAX_LOCAL_APIC]; -extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); extern unsigned long numa_free_all_bootmem(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); #ifdef CONFIG_NUMA extern void __init init_cpu_to_node(void); - -static inline void clear_node_cpumask(int cpu) -{ - clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); -} - +extern void __cpuinit numa_set_node(int cpu, int node); +extern void __cpuinit numa_clear_node(int cpu); +extern void __cpuinit numa_add_cpu(int cpu); +extern void __cpuinit numa_remove_cpu(int cpu); #else -#define init_cpu_to_node() do {} while (0) -#define clear_node_cpumask(cpu) do {} while (0) +static inline void init_cpu_to_node(void) { } +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +static inline void numa_add_cpu(int cpu, int node) { } +static inline void numa_remove_cpu(int cpu) { } #endif #endif diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h index 94b86c31239a..ef068d2465d6 100644 --- a/include/asm-x86/numaq.h +++ b/include/asm-x86/numaq.h @@ -28,6 +28,7 @@ #ifdef CONFIG_X86_NUMAQ +extern int found_numaq; extern int get_memcfg_numaq(void); /* @@ -156,9 +157,10 @@ struct sys_cfg_data { struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ }; -static inline unsigned long *get_zholes_size(int nid) +#else +static inline int get_memcfg_numaq(void) { - return NULL; + return 0; } #endif /* CONFIG_X86_NUMAQ */ #endif /* NUMAQ_H */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index dc936dddf161..b52ed85f32f5 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -51,8 +51,15 @@ #ifndef __ASSEMBLY__ +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); extern unsigned long max_pfn_mapped; @@ -74,9 +81,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - static inline pgd_t native_make_pgd(pgdval_t val) { return (pgd_t) { val }; @@ -160,6 +164,7 @@ static inline pteval_t native_pte_val(pte_t pte) #endif #define pte_val(x) native_pte_val(x) +#define pte_flags(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index ccf0ba3c3aba..ab8528793f08 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -13,6 +13,14 @@ */ #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#ifdef CONFIG_4KSTACKS +#define THREAD_ORDER 0 +#else +#define THREAD_ORDER 1 +#endif +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + + #ifdef CONFIG_X86_PAE /* 44=32+12, the limit we can fit into an unsigned long pfn */ #define __PHYSICAL_MASK_SHIFT 44 @@ -84,6 +92,13 @@ extern int sysctl_legacy_va_layout; #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) #define MAXMEM (-__PAGE_OFFSET - __VMALLOC_RESERVE) +extern void find_low_pfn_range(void); +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); +extern void initmem_init(unsigned long, unsigned long); +extern void setup_bootmem_allocator(void); + + #ifdef CONFIG_X86_USE_3DNOW #include <asm/mmx.h> diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index 6ea72859c491..c6916c83e6b1 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -26,7 +26,13 @@ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -#define __PAGE_OFFSET _AC(0xffff810000000000, UL) +/* + * Set __PAGE_OFFSET to the most negative possible address + + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a + * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's + * what Xen requires. + */ +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) #define __PHYSICAL_START CONFIG_PHYSICAL_START #define __KERNEL_ALIGN 0x200000 @@ -58,7 +64,8 @@ void clear_page(void *page); void copy_page(void *to, void *from); -extern unsigned long end_pfn; +/* duplicated to the one in bootmem.h */ +extern unsigned long max_pfn; extern unsigned long phys_base; extern unsigned long __phys_addr(unsigned long); @@ -83,10 +90,15 @@ typedef struct { pteval_t pte; } pte_t; extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); + +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < end_pfn) +#define pfn_valid(pfn) ((pfn) < max_pfn) #endif diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 0f13b945e240..ef5e8ec6a6ab 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -84,7 +84,7 @@ struct pv_time_ops { int (*set_wallclock)(unsigned long); unsigned long long (*sched_clock)(void); - unsigned long (*get_cpu_khz)(void); + unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { @@ -115,6 +115,9 @@ struct pv_cpu_ops { void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); +#ifdef CONFIG_X86_64 + void (*load_gs_index)(unsigned int idx); +#endif void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, const void *desc); void (*write_gdt_entry)(struct desc_struct *, @@ -141,8 +144,32 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); - /* These two are jmp to, not actually called. */ - void (*irq_enable_syscall_ret)(void); + /* + * Atomically enable interrupts and return to userspace. This + * is only ever used to return to 32-bit processes; in a + * 64-bit kernel, it's used for 32-on-64 compat processes, but + * never native 64-bit processes. (Jump, not call.) + */ + void (*irq_enable_sysexit)(void); + + /* + * Switch to usermode gs and return to 64-bit usermode using + * sysret. Only used in 64-bit kernels to return to 64-bit + * processes. Usermode register state, including %rsp, must + * already be restored. + */ + void (*usergs_sysret64)(void); + + /* + * Switch to usermode gs and return to 32-bit usermode using + * sysret. Used to return to 32-on-64 compat processes. + * Other usermode register state, including %esp, must already + * be restored. + */ + void (*usergs_sysret32)(void); + + /* Normal iret. Jump to this with the standard iret stack + frame set up. */ void (*iret)(void); void (*swapgs)(void); @@ -165,6 +192,10 @@ struct pv_irq_ops { void (*irq_enable)(void); void (*safe_halt)(void); void (*halt)(void); + +#ifdef CONFIG_X86_64 + void (*adjust_exception_frame)(void); +#endif }; struct pv_apic_ops { @@ -219,7 +250,14 @@ struct pv_mmu_ops { void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); - /* Hooks for allocating/releasing pagetable pages */ + /* Hooks for allocating and freeing a pagetable top-level */ + int (*pgd_alloc)(struct mm_struct *mm); + void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); + + /* + * Hooks for allocating/releasing pagetable pages when they're + * attached to a pagetable + */ void (*alloc_pte)(struct mm_struct *mm, u32 pfn); void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); @@ -238,7 +276,13 @@ struct pv_mmu_ops { void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + pteval_t (*pte_val)(pte_t); + pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -273,6 +317,13 @@ struct pv_mmu_ops { #endif struct pv_lazy_ops lazy_mode; + + /* dom0 ops */ + + /* Sometimes the physical address is a pfn, and sometimes its + an mfn. We can tell which is which from the index. */ + void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags); }; /* This contains all the paravirt structures: we get a convenient @@ -439,10 +490,17 @@ int paravirt_disable_iospace(void); #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" #endif +#ifdef CONFIG_PARAVIRT_DEBUG +#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) +#else +#define PVOP_TEST_NULL(op) ((void)op) +#endif + #define __PVOP_CALL(rettype, op, pre, post, ...) \ ({ \ rettype __ret; \ PVOP_CALL_ARGS; \ + PVOP_TEST_NULL(op); \ /* This is 32-bit specific, but is okay in 64-bit */ \ /* since this condition will never hold */ \ if (sizeof(rettype) > sizeof(unsigned long)) { \ @@ -471,6 +529,7 @@ int paravirt_disable_iospace(void); #define __PVOP_VCALL(op, pre, post, ...) \ ({ \ PVOP_VCALL_ARGS; \ + PVOP_TEST_NULL(op); \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ @@ -720,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) +#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) static inline unsigned long long paravirt_read_pmc(int counter) { @@ -789,6 +848,13 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu) PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); } +#ifdef CONFIG_X86_64 +static inline void load_gs_index(unsigned int gs) +{ + PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); +} +#endif + static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { @@ -912,6 +978,16 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); } +static inline int paravirt_pgd_alloc(struct mm_struct *mm) +{ + return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); +} + +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); +} + static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); @@ -996,6 +1072,20 @@ static inline pteval_t pte_val(pte_t pte) return ret; } +static inline pteval_t pte_flags(pte_t pte) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, + pte.pte); + + return ret; +} + static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; @@ -1024,6 +1114,29 @@ static inline pgdval_t pgd_val(pgd_t pgd) return ret; } +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pteval_t ret; + + ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, + mm, addr, ptep); + + return (pte_t) { .pte = ret }; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (sizeof(pteval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); + else + PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, + mm, addr, ptep, pte.pte); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { if (sizeof(pteval_t) > sizeof(long)) @@ -1252,6 +1365,12 @@ static inline void arch_flush_lazy_mmu_mode(void) } } +static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags) +{ + pv_mmu_ops.set_fixmap(idx, phys, flags); +} + void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) @@ -1374,54 +1493,86 @@ static inline unsigned long __raw_local_irq_save(void) #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) +#define PARA_INDIRECT(addr) *addr(%rip) #else #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +#define PARA_INDIRECT(addr) *%cs:addr #endif #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp *%cs:pv_cpu_ops+PV_CPU_iret) + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS; \ - call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ + PV_SAVE_REGS; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS;) \ #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS; \ - call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ + PV_SAVE_REGS; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS;) -#define ENABLE_INTERRUPTS_SYSCALL_RET \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ +#define USERGS_SYSRET32 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ CLBR_NONE, \ - jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret) - + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) #ifdef CONFIG_X86_32 -#define GET_CR0_INTO_EAX \ - push %ecx; push %edx; \ - call *pv_cpu_ops+PV_CPU_read_cr0; \ +#define GET_CR0_INTO_EAX \ + push %ecx; push %edx; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx -#else + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) + + +#else /* !CONFIG_X86_32 */ + +/* + * If swapgs is used while the userspace stack is still current, + * there's no way to call a pvop. The PV replacement *must* be + * inlined, or the swapgs instruction must be trapped and emulated. + */ +#define SWAPGS_UNSAFE_STACK \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ + swapgs) + #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ PV_SAVE_REGS; \ - call *pv_cpu_ops+PV_CPU_swapgs; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ PV_RESTORE_REGS \ ) -#define GET_CR2_INTO_RCX \ - call *pv_mmu_ops+PV_MMU_read_cr2; \ - movq %rax, %rcx; \ +#define GET_CR2_INTO_RCX \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ + movq %rax, %rcx; \ xorq %rax, %rax; -#endif +#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ + CLBR_NONE, \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) + +#define USERGS_SYSRET64 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + +#define ENABLE_INTERRUPTS_SYSEXIT32 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ + CLBR_NONE, \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) +#endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h index 88f60cc6a227..7edc47307217 100644 --- a/include/asm-x86/pat.h +++ b/include/asm-x86/pat.h @@ -1,14 +1,13 @@ - #ifndef _ASM_PAT_H -#define _ASM_PAT_H 1 +#define _ASM_PAT_H #include <linux/types.h> #ifdef CONFIG_X86_PAT -extern int pat_wc_enabled; +extern int pat_enabled; extern void validate_pat_support(struct cpuinfo_x86 *c); #else -static const int pat_wc_enabled = 0; +static const int pat_enabled; static inline void validate_pat_support(struct cpuinfo_x86 *c) { } #endif @@ -21,4 +20,3 @@ extern int free_memtype(u64 start, u64 end); extern void pat_disable(char *reason); #endif - diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 30bbde0cb34b..2db14cf17db8 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -18,6 +18,8 @@ struct pci_sysdata { #endif }; +extern int pci_routeirq; + /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node); diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h index 8c4c3a0368e2..a50d46851285 100644 --- a/include/asm-x86/pci_32.h +++ b/include/asm-x86/pci_32.h @@ -18,12 +18,14 @@ struct pci_dev; #define PCI_DMA_BUS_IS_PHYS (1) /* pci_unmap_{page,single} is a nop so... */ -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; +#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) +#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { break; } while (pci_unmap_len(PTR, LEN_NAME)) #endif /* __KERNEL__ */ diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 101fb9e11954..b34e9a7cc80b 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -22,6 +22,8 @@ struct x8664_pda { offset 40!!! */ #endif char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; @@ -37,8 +39,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[]; -extern struct x8664_pda boot_cpu_pda[]; +extern struct x8664_pda **_cpu_pda; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index 736fc3bb8e1e..912a3a17b9db 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -143,4 +143,50 @@ do { \ #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ #endif /* !CONFIG_X86_64 */ + +#ifdef CONFIG_SMP + +/* + * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu + * variables that are initialized and accessed before there are per_cpu + * areas allocated. + */ + +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr = _name##_early_map + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) +#define early_per_cpu(_name, _cpu) \ + (early_per_cpu_ptr(_name) ? \ + early_per_cpu_ptr(_name)[_cpu] : \ + per_cpu(_name, _cpu)) + +#else /* !CONFIG_SMP */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name) + +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL +/* no early_per_cpu_map() */ + +#endif /* !CONFIG_SMP */ + #endif /* _ASM_X86_PERCPU_H_ */ diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h index 91e4641f3f31..d63ea431cb3b 100644 --- a/include/asm-x86/pgalloc.h +++ b/include/asm-x86/pgalloc.h @@ -5,9 +5,13 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else +#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b2910b..49cbd76b9547 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -20,30 +20,25 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ -/* - * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a - * sign-extended value on 32-bit with all 1's in the upper word, - * which preserves the upper pte values on 64-bit ptes: - */ -#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) -#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) -#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) -#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) -#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) -#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) -#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) -#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ -#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ -#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1) -#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) -#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) -#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT) -#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE) +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2) +#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else -#define _PAGE_NX 0 +#define _PAGE_NX (_AT(pteval_t, 0)) #endif /* If _PAGE_PRESENT is clear, we use these: */ @@ -83,19 +78,9 @@ #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ _PAGE_ACCESSED) -#ifdef CONFIG_X86_32 -#define _PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX) - -#ifndef __ASSEMBLY__ -extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; -#endif /* __ASSEMBLY__ */ -#else #define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) -#endif #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) @@ -106,26 +91,22 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#ifdef CONFIG_X86_32 -# define MAKE_GLOBAL(x) __pgprot((x)) -#else -# define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) -#endif - -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) -#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC) -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) -#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) -#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) -#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE) -#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC) -#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) -#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE) +#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) +#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) +#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) +#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) +#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) +#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) +#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) +#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) +#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) +#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) /* xwr */ #define __P000 PAGE_NONE @@ -164,37 +145,37 @@ extern struct list_head pgd_list; */ static inline int pte_dirty(pte_t pte) { - return pte_val(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { - return pte_val(pte) & _PAGE_ACCESSED; + return pte_flags(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { - return pte_val(pte) & _PAGE_RW; + return pte_flags(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { - return pte_val(pte) & _PAGE_FILE; + return pte_flags(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { - return pte_val(pte) & _PAGE_PSE; + return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { - return pte_val(pte) & _PAGE_GLOBAL; + return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { - return !(pte_val(pte) & _PAGE_NX); + return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) @@ -210,22 +191,22 @@ static inline int pmd_large(pmd_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); + return __pte(pte_val(pte) & ~_PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); + return __pte(pte_val(pte) & ~_PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) @@ -250,7 +231,7 @@ static inline pte_t pte_mkhuge(pte_t pte) static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); + return __pte(pte_val(pte) & ~_PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) @@ -260,7 +241,7 @@ static inline pte_t pte_mkglobal(pte_t pte) static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); + return __pte(pte_val(pte) & ~_PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) @@ -305,7 +286,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) return __pgprot(preservebits | addbits); } -#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK) +#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) @@ -318,6 +299,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); #endif +/* Install a pte for a particular vaddr in kernel space. */ +void set_pte_vaddr(unsigned long vaddr, pte_t pte); + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT */ @@ -359,6 +343,26 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, # include "pgtable_64.h" #endif +/* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * this macro returns the index of the entry in the pgd page which would + * control the given virtual address + */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + +/* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; + */ +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +/* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) + + #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) @@ -369,8 +373,15 @@ enum { PG_LEVEL_4K, PG_LEVEL_2M, PG_LEVEL_1G, + PG_LEVEL_NUM }; +#ifdef CONFIG_PROC_FS +extern void update_page_count(int level, unsigned long pages); +#else +static inline void update_page_count(int level, unsigned long pages) { } +#endif + /* * Helper function that returns the kernel pagetable entry controlling * the virtual address 'address'. NULL means no pagetable entry present. @@ -420,6 +431,8 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, * race with other CPU's that might be updating the dirty * bit at the same time. */ +struct vm_area_struct; + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 32ca03109a4c..ec871c420d7e 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -113,26 +113,6 @@ extern unsigned long pg0[]; */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -/* - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] - * - * this macro returns the index of the entry in the pgd page which would - * control the given virtual address - */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_index_k(addr) pgd_index((addr)) - -/* - * pgd_offset() returns a (pgd_t *) - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; - */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) - -/* - * a shortcut which implies the use of the kernel's pgd, instead - * of a process's - */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) static inline int pud_large(pud_t pud) { return 0; } diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 1cc50d22d735..fa7208b483ca 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -70,6 +70,9 @@ extern void paging_init(void); struct mm_struct; +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); + + static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -190,12 +193,9 @@ static inline int pmd_bad(pmd_t pmd) #define pgd_page_vaddr(pgd) \ ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK)) #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) -#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address))) #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) static inline int pgd_large(pgd_t pgd) { return 0; } -#define mk_kernel_pgd(address) ((pgd_t){ (address) | _KERNPG_TABLE }) +#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) /* PUD - Level3 access */ /* to find an entry in a page-table-directory. */ diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 199cab107d85..092b39b3a7e6 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -88,4 +88,10 @@ #define CX86_ARR_BASE 0xc4 #define CX86_RCR_BASE 0xdc +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 559105220a47..7f7382704592 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -153,7 +153,7 @@ static inline int hlt_works(int cpu) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void identify_cpu(struct cpuinfo_x86 *); +extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); @@ -263,15 +263,11 @@ struct tss_struct { struct thread_struct *io_bitmap_owner; /* - * Pad the TSS to be cacheline-aligned (size is 0x100): - */ - unsigned long __cacheline_filler[35]; - /* * .. and then another 0x100 bytes for the emergency kernel stack: */ unsigned long stack[64]; -} __attribute__((packed)); +} ____cacheline_aligned; DECLARE_PER_CPU(struct tss_struct, init_tss); @@ -535,7 +531,6 @@ static inline void load_sp0(struct tss_struct *tss, } #define set_iopl_mask native_set_iopl_mask -#define SWAPGS swapgs #endif /* CONFIG_PARAVIRT */ /* diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index 6c8b41b03f6d..3dd458c385c0 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -14,8 +14,6 @@ extern void ia32_syscall(void); extern void ia32_cstar_target(void); extern void ia32_sysenter_target(void); -extern void reserve_bootmem_generic(unsigned long phys, unsigned len); - extern void syscall32_cpu_init(void); extern void check_efer(void); diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0b95d6..8a71db803da6 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -3,7 +3,12 @@ #include <linux/compiler.h> /* For __user */ #include <asm/ptrace-abi.h> +#include <asm/processor-flags.h> +#ifdef __KERNEL__ +#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ +#include <asm/segment.h> +#endif #ifndef __ASSEMBLY__ @@ -55,9 +60,6 @@ struct pt_regs { unsigned long ss; }; -#include <asm/vm86.h> -#include <asm/segment.h> - #endif /* __KERNEL__ */ #else /* __i386__ */ diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index e63741f19392..206f355786dc 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -14,8 +14,8 @@ struct machine_ops { extern struct machine_ops machine_ops; -void machine_real_restart(unsigned char *code, int length); void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); +void machine_real_restart(const unsigned char *code, int length); #endif /* _ASM_REBOOT_H */ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index 7400d3ad75c6..adec887dd7cd 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -19,9 +19,13 @@ #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) # define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) #else # define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else # define NEED_CX8 0 #endif @@ -38,7 +42,7 @@ #endif #ifdef CONFIG_X86_64 -#define NEED_PSE (1<<(X86_FEATURE_PSE & 31)) +#define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 2557514d7ef6..8d9f0b41ee86 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -6,7 +6,7 @@ #define TRACE_RESUME(user) \ do { \ if (pm_trace_enabled) { \ - void *tracedata; \ + const void *tracedata; \ asm volatile(_ASM_MOV_UL " $1f,%0\n" \ ".section .tracedata,\"a\"\n" \ "1:\t.word %c1\n\t" \ diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h index 18da19e89bff..36e71c5f306f 100644 --- a/include/asm-x86/seccomp_32.h +++ b/include/asm-x86/seccomp_32.h @@ -1,4 +1,5 @@ #ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H #include <linux/thread_info.h> diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h index 553af65a2287..76cfe69aa63c 100644 --- a/include/asm-x86/seccomp_64.h +++ b/include/asm-x86/seccomp_64.h @@ -1,4 +1,5 @@ #ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H #include <linux/thread_info.h> diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h index ed5131dd7d92..dfc8601c0892 100644 --- a/include/asm-x86/segment.h +++ b/include/asm-x86/segment.h @@ -61,18 +61,14 @@ #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) #define GDT_ENTRY_DEFAULT_USER_CS 14 -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) #define GDT_ENTRY_DEFAULT_USER_DS 15 -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) #define GDT_ENTRY_KERNEL_BASE 12 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) @@ -139,10 +135,11 @@ #else #include <asm/cache.h> -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +#define GDT_ENTRY_KERNEL32_CS 1 +#define GDT_ENTRY_KERNEL_CS 2 +#define GDT_ENTRY_KERNEL_DS 3 -#define __KERNEL32_CS 0x08 +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) /* * we cannot use the same code segment descriptor for user and kernel @@ -150,10 +147,10 @@ * The segment offset needs to contain a RPL. Grr. -AK * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) */ - -#define __USER32_CS 0x23 /* 4*8+3 */ -#define __USER_DS 0x2b /* 5*8+3 */ -#define __USER_CS 0x33 /* 6*8+3 */ +#define GDT_ENTRY_DEFAULT_USER32_CS 4 +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define GDT_ENTRY_DEFAULT_USER_CS 6 +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) #define __USER32_DS __USER_DS #define GDT_ENTRY_TSS 8 /* needs two entries */ @@ -175,6 +172,10 @@ #endif +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) #ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index fa6763af8d26..1d121c632d9e 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -8,7 +8,6 @@ /* Interrupt control for vSMPowered x86_64 systems */ void vsmp_init(void); -char *machine_specific_memory_setup(void); #ifndef CONFIG_PARAVIRT #define paravirt_post_allocator_init() do {} while (0) #endif @@ -43,26 +42,23 @@ char *machine_specific_memory_setup(void); */ extern struct boot_params boot_params; -#ifdef __i386__ /* * Do NOT EVER look at the BIOS memory size location. * It does not work on many machines. */ #define LOWMEMSIZE() (0x9f000) -struct e820entry; - -char * __init machine_specific_memory_setup(void); -char *memory_setup(void); +#ifdef __i386__ -int __init copy_e820_map(struct e820entry *biosmap, int nr_map); -int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type); +void __init i386_start_kernel(void); +extern void probe_roms(void); +extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; - +#else +void __init x86_64_start_kernel(char *real_mode); +void __init x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5cd3112..2e221f1ce0b2 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -29,21 +29,12 @@ extern int smp_num_siblings; extern unsigned int num_processors; extern cpumask_t cpu_initialized; -#ifdef CONFIG_SMP -extern u16 x86_cpu_to_apicid_init[]; -extern u16 x86_bios_cpu_apicid_init[]; -extern void *x86_cpu_to_apicid_early_ptr; -extern void *x86_bios_cpu_apicid_early_ptr; -#else -#define x86_cpu_to_apicid_early_ptr NULL -#define x86_bios_cpu_apicid_early_ptr NULL -#endif - DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(u16, x86_cpu_to_apicid); -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); + +DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); /* Static state in head.S used to set up a CPU */ extern struct { @@ -118,8 +109,6 @@ int native_cpu_up(unsigned int cpunum); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -130,6 +119,14 @@ static inline int num_booting_cpus(void) } #endif /* CONFIG_SMP */ +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_CPU) +extern void prefill_possible_map(void); +#else +static inline void prefill_possible_map(void) +{ +} +#endif + extern unsigned disabled_cpus __cpuinitdata; #ifdef CONFIG_X86_32_SMP @@ -197,11 +194,9 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_HOTPLUG_CPU -extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif -extern void smp_alloc_memory(void); extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #endif /* __ASSEMBLY__ */ diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h index f4bba131d068..774c919dc232 100644 --- a/include/asm-x86/srat.h +++ b/include/asm-x86/srat.h @@ -27,11 +27,13 @@ #ifndef _ASM_SRAT_H_ #define _ASM_SRAT_H_ -#ifndef CONFIG_ACPI_SRAT -#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included -#endif - +#ifdef CONFIG_ACPI_NUMA extern int get_memcfg_from_srat(void); -extern unsigned long *get_zholes_size(int); +#else +static inline int get_memcfg_from_srat(void) +{ + return 0; +} +#endif #endif /* _ASM_SRAT_H_ */ diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index b49369ad9a61..193578cd1fd9 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c); #define __HAVE_ARCH_STRLEN extern size_t strlen(const char *s); -static __always_inline void * __memcpy(void * to, const void * from, size_t n) +static __always_inline void *__memcpy(void *to, const void *from, size_t n) { -int d0, d1, d2; -__asm__ __volatile__( - "rep ; movsl\n\t" - "movl %4,%%ecx\n\t" - "andl $3,%%ecx\n\t" - "jz 1f\n\t" - "rep ; movsb\n\t" - "1:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) - : "memory"); -return (to); + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; } /* * This looks ugly, but the compiler can optimize it totally, * as the count is constant. */ -static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) { long esi, edi; - if (!n) return to; -#if 1 /* want to do small copies with non-string ops? */ + if (!n) + return to; + switch (n) { - case 1: *(char*)to = *(char*)from; return to; - case 2: *(short*)to = *(short*)from; return to; - case 4: *(int*)to = *(int*)from; return to; -#if 1 /* including those doable with two moves? */ - case 3: *(short*)to = *(short*)from; - *((char*)to+2) = *((char*)from+2); return to; - case 5: *(int*)to = *(int*)from; - *((char*)to+4) = *((char*)from+4); return to; - case 6: *(int*)to = *(int*)from; - *((short*)to+2) = *((short*)from+2); return to; - case 8: *(int*)to = *(int*)from; - *((int*)to+1) = *((int*)from+1); return to; -#endif + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; } -#endif - esi = (long) from; - edi = (long) to; - if (n >= 5*4) { + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { /* large block: use rep prefix */ int ecx; - __asm__ __volatile__( - "rep ; movsl" - : "=&c" (ecx), "=&D" (edi), "=&S" (esi) - : "0" (n/4), "1" (edi),"2" (esi) - : "memory" + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" ); } else { /* small block: don't clobber ecx + smaller code */ - if (n >= 4*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 3*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 2*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 1*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); } switch (n % 4) { /* tail */ - case 0: return to; - case 1: __asm__ __volatile__("movsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - case 2: __asm__ __volatile__("movsw" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - default: __asm__ __volatile__("movsw\n\tmovsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; } } @@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si * This CPU favours 3DNow strongly (eg AMD Athlon) */ -static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __constant_memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) +static inline void *__memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy3d((t),(f),(n)) : \ - __memcpy3d((t),(f),(n))) +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy3d((t), (f), (n)) \ + : __memcpy3d((t), (f), (n))) #else /* * No 3D Now! */ - -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy((t),(f),(n)) : \ - __memcpy((t),(f),(n))) + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy((t), (f), (n)) \ + : __memcpy((t), (f), (n))) #endif #define __HAVE_ARCH_MEMMOVE -void *memmove(void * dest,const void * src, size_t n); +void *memmove(void *dest, const void *src, size_t n); #define memcmp __builtin_memcmp #define __HAVE_ARCH_MEMCHR -extern void *memchr(const void * cs,int c,size_t count); +extern void *memchr(const void *cs, int c, size_t count); -static inline void * __memset_generic(void * s, char c,size_t count) +static inline void *__memset_generic(void *s, char c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep\n\t" - "stosb" - : "=&c" (d0), "=&D" (d1) - :"a" (c),"1" (s),"0" (count) - :"memory"); -return s; + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; } /* we might want to write optimized versions of these later */ -#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) /* - * memset(x,0,y) is a reasonably common thing to do, so we want to fill + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill * things 32 bits at a time even when we don't know the size of the * area at compile-time.. */ -static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep ; stosl\n\t" - "testb $2,%b3\n\t" - "je 1f\n\t" - "stosw\n" - "1:\ttestb $1,%b3\n\t" - "je 2f\n\t" - "stosb\n" - "2:" - :"=&c" (d0), "=&D" (d1) - :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) - :"memory"); -return (s); + int d0, d1; + asm volatile("rep ; stosl\n\t" + "testb $2,%b3\n\t" + "je 1f\n\t" + "stosw\n" + "1:\ttestb $1,%b3\n\t" + "je 2f\n\t" + "stosb\n" + "2:" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) + : "memory"); + return s; } /* Added by Gertjan van Wingerde to make minix and sysv module work */ #define __HAVE_ARCH_STRNLEN -extern size_t strnlen(const char * s, size_t count); +extern size_t strnlen(const char *s, size_t count); /* end of additional stuff */ #define __HAVE_ARCH_STRSTR @@ -207,66 +241,85 @@ extern char *strstr(const char *cs, const char *ct); * This looks horribly ugly, but the compiler can optimize it totally, * as we by now know that both pattern and count is constant.. */ -static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, + size_t count) { switch (count) { + case 0: + return s; + case 1: + *(unsigned char *)s = pattern & 0xff; + return s; + case 2: + *(unsigned short *)s = pattern & 0xffff; + return s; + case 3: + *(unsigned short *)s = pattern & 0xffff; + *((unsigned char *)s + 2) = pattern & 0xff; + return s; + case 4: + *(unsigned long *)s = pattern; + return s; + } + +#define COMMON(x) \ + asm volatile("rep ; stosl" \ + x \ + : "=&c" (d0), "=&D" (d1) \ + : "a" (eax), "0" (count/4), "1" ((long)s) \ + : "memory") + + { + int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 + /* Workaround for broken gcc 4.0 */ + register unsigned long eax asm("%eax") = pattern; +#else + unsigned long eax = pattern; +#endif + + switch (count % 4) { case 0: + COMMON(""); return s; case 1: - *(unsigned char *)s = pattern & 0xff; + COMMON("\n\tstosb"); return s; case 2: - *(unsigned short *)s = pattern & 0xffff; + COMMON("\n\tstosw"); return s; - case 3: - *(unsigned short *)s = pattern & 0xffff; - *(2+(unsigned char *)s) = pattern & 0xff; - return s; - case 4: - *(unsigned long *)s = pattern; + default: + COMMON("\n\tstosw\n\tstosb"); return s; + } } -#define COMMON(x) \ -__asm__ __volatile__( \ - "rep ; stosl" \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern),"0" (count/4),"1" ((long) s) \ - : "memory") -{ - int d0, d1; - switch (count % 4) { - case 0: COMMON(""); return s; - case 1: COMMON("\n\tstosb"); return s; - case 2: COMMON("\n\tstosw"); return s; - default: COMMON("\n\tstosw\n\tstosb"); return s; - } -} - + #undef COMMON } -#define __constant_c_x_memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_c_and_count_memset((s),(c),(count)) : \ - __constant_c_memset((s),(c),(count))) +#define __constant_c_x_memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_c_and_count_memset((s), (c), (count)) \ + : __constant_c_memset((s), (c), (count))) -#define __memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_count_memset((s),(c),(count)) : \ - __memset_generic((s),(c),(count))) +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET -#define memset(s, c, count) \ -(__builtin_constant_p(c) ? \ - __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ - __memset((s),(c),(count))) +#define memset(s, c, count) \ + (__builtin_constant_p(c) \ + ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ + (count)) \ + : __memset((s), (c), (count))) /* * find the first occurrence of byte 'c', or 1 past the area if none */ #define __HAVE_ARCH_MEMSCAN -extern void *memscan(void * addr, int c, size_t size); +extern void *memscan(void *addr, int c, size_t size); #endif /* __KERNEL__ */ diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h index 24e1c080aa8a..8675c6782a7d 100644 --- a/include/asm-x86/suspend_32.h +++ b/include/asm-x86/suspend_32.h @@ -3,6 +3,9 @@ * Based on code * Copyright 2001 Patrick Mochel <[email protected]> */ +#ifndef __ASM_X86_32_SUSPEND_H +#define __ASM_X86_32_SUSPEND_H + #include <asm/desc.h> #include <asm/i387.h> @@ -44,3 +47,5 @@ static inline void acpi_save_register_state(unsigned long return_point) /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); #endif + +#endif /* __ASM_X86_32_SUSPEND_H */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b29..c4946c5964bf 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -136,7 +136,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) -extern void load_gs_index(unsigned); +extern void native_load_gs_index(unsigned); /* * Load a segment. Fall back on loading the zero @@ -153,14 +153,14 @@ extern void load_gs_index(unsigned); "jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0)) + : :"r" (value), "r" (0) : "memory") /* * Save a segment register away */ #define savesegment(seg, value) \ - asm volatile("mov %%" #seg ",%0":"=rm" (value)) + asm("mov %%" #seg ",%0":"=rm" (value) : : "memory") static inline unsigned long get_limit(unsigned long segment) { @@ -282,6 +282,7 @@ static inline void native_wbinvd(void) #ifdef CONFIG_X86_64 #define read_cr8() (native_read_cr8()) #define write_cr8(x) (native_write_cr8(x)) +#define load_gs_index native_load_gs_index #endif /* Clear the 'TS' bit */ @@ -289,7 +290,7 @@ static inline void native_wbinvd(void) #endif/* CONFIG_PARAVIRT */ -#define stts() write_cr0(8 | read_cr0()) +#define stts() write_cr0(read_cr0() | X86_CR0_TS) #endif /* __KERNEL__ */ @@ -303,7 +304,6 @@ static inline void clflush(volatile void *__p) void disable_hlt(void); void enable_hlt(void); -extern int es7000_plat; void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 77244f17993f..895339d2bc0b 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -1,9 +1,253 @@ +/* thread_info.h: low-level thread information + * + * Copyright (C) 2002 David Howells ([email protected]) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + #ifndef _ASM_X86_THREAD_INFO_H +#define _ASM_X86_THREAD_INFO_H + +#include <linux/compiler.h> +#include <asm/page.h> +#include <asm/types.h> + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + */ +#ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include <asm/processor.h> + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; + struct restart_block restart_block; + void __user *sysenter_return; +#ifdef CONFIG_X86_32 + unsigned long previous_esp; /* ESP of the previous stack in + case of nested (IRQ) stacks + */ + __u8 supervisor_stack[0]; +#endif +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +#else /* !__ASSEMBLY__ */ + +#include <asm/asm-offsets.h> + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ #ifdef CONFIG_X86_32 -# include "thread_info_32.h" +#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ +#endif +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ +#define TIF_NOTSC 16 /* TSC is not accessible in userland */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#ifdef CONFIG_X86_32 +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #else -# include "thread_info_64.h" +#define _TIF_SYSCALL_EMU 0 #endif +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP| \ + _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +/* Only used for 64 bit */ +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ + _TIF_NOTSC) + +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + +#define PREEMPT_ACTIVE 0x10000000 + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) +#else +#define THREAD_FLAGS GFP_KERNEL +#endif + +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) + (current_stack_pointer & ~(THREAD_SIZE - 1)); +} + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movl $-THREAD_SIZE, reg; \ + andl %esp, reg + +/* use this one if reg already contains %esp */ +#define GET_THREAD_INFO_WITH_ESP(reg) \ + andl $-THREAD_SIZE, reg + +#endif + +#else /* X86_32 */ + +#include <asm/pda.h> + +/* + * macros/functions for gaining access to the thread information structure + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); + return ti; +} + +/* do not use in interrupt context */ +static inline struct thread_info *stack_thread_info(void) +{ + struct thread_info *ti; + asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + return ti; +} + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movq %gs:pda_kernelstack,reg ; \ + subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + +#endif + +#endif /* !X86_32 */ + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#define TS_POLLING 0x0004 /* true if in idle loop + and not sleeping */ +#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); +} +#endif /* !__ASSEMBLY__ */ #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h deleted file mode 100644 index b6338829d1a8..000000000000 --- a/include/asm-x86/thread_info_32.h +++ /dev/null @@ -1,205 +0,0 @@ -/* thread_info.h: i386 low-level thread information - * - * Copyright (C) 2002 David Howells ([email protected]) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include <linux/compiler.h> -#include <asm/page.h> - -#ifndef __ASSEMBLY__ -#include <asm/processor.h> -#endif - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - * - if the contents of this structure are changed, - * the assembly constants must also be changed - */ -#ifndef __ASSEMBLY__ - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ - unsigned long status; /* thread-synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF user-thread - 0-0xFFFFFFFF kernel-thread - */ - void *sysenter_return; - struct restart_block restart_block; - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - */ - __u8 supervisor_stack[0]; -}; - -#else /* !__ASSEMBLY__ */ - -#include <asm/asm-offsets.h> - -#endif - -#define PREEMPT_ACTIVE 0x10000000 -#ifdef CONFIG_4KSTACKS -#define THREAD_SIZE (4096) -#else -#define THREAD_SIZE (8192) -#endif - -#define STACK_WARN (THREAD_SIZE/8) -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) -#else -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) -#endif - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg - -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg - -#endif - -/* - * thread information flags - * - these are process state flags that various - * assembly files may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to - user mode */ -#define TIF_IRET 4 /* return with iret */ -#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ -#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ -#define TIF_MEMDIE 16 -#define TIF_DEBUG 17 /* uses debug registers */ -#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ -#define TIF_FREEZE 19 /* is freezing for suspend */ -#define TIF_NOTSC 20 /* TSC is not accessible in userland */ -#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_EMU)) -/* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ - _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_POLLING 0x0002 /* True if in idle loop - and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0004 /* restore signal mask in do_signal() */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h deleted file mode 100644 index cb69f70abba1..000000000000 --- a/include/asm-x86/thread_info_64.h +++ /dev/null @@ -1,195 +0,0 @@ -/* thread_info.h: x86_64 low-level thread information - * - * Copyright (C) 2002 David Howells ([email protected]) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include <asm/page.h> -#include <asm/types.h> -#include <asm/pda.h> - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - */ -#ifndef __ASSEMBLY__ -struct task_struct; -struct exec_domain; -#include <asm/processor.h> - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; - struct restart_block restart_block; -#ifdef CONFIG_IA32_EMULATION - void __user *sysenter_return; -#endif -}; -#endif - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); - return ti; -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) -#else -#define THREAD_FLAGS GFP_KERNEL -#endif - -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg - -#endif - -/* - * thread information flags - * - these are process state flags that various assembly files - * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ -/* 16 free */ -#define TIF_IA32 17 /* 32bit process */ -#define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 -#define TIF_MEMDIE 20 -#define TIF_DEBUG 21 /* uses debug registers */ -#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -#define TIF_FREEZE 23 /* is freezing for suspend */ -#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -#define TIF_NOTSC 28 /* TSC is not accessible in userland */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -#define _TIF_NOTSC (1 << TIF_NOTSC) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - -#define PREEMPT_ACTIVE 0x10000000 - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_COMPAT 0x0002 /* 32bit syscall active */ -#define TS_POLLING 0x0004 /* true if in idle loop - and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index bce72d7a958c..a17fa473e91d 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -56,4 +56,6 @@ static inline int native_set_wallclock(unsigned long nowtime) #endif /* CONFIG_PARAVIRT */ +extern unsigned long __init calibrate_cpu(void); + #endif diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index 4f6fcb050c11..fb2a4ddddf3d 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -7,14 +7,14 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -unsigned long native_calculate_cpu_khz(void); +unsigned long native_calibrate_tsc(void); extern int timer_ack; extern int no_timer_check; extern int recalibrate_cpu_khz(void); #ifndef CONFIG_PARAVIRT -#define calculate_cpu_khz() native_calculate_cpu_khz() +#define calibrate_tsc() native_calibrate_tsc() #endif /* Accelerators for sched_clock() diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index dcf3f8131d6b..98e5f17ea856 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -35,79 +35,93 @@ # endif #endif +/* Node not present */ +#define NUMA_NO_NODE (-1) + #ifdef CONFIG_NUMA #include <linux/cpumask.h> #include <asm/mpspec.h> -/* Mappings between logical cpu number and node number */ #ifdef CONFIG_X86_32 -extern int cpu_to_node_map[]; -#else -/* Returns the number of the current Node. */ -#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) -#endif - -DECLARE_PER_CPU(int, x86_cpu_to_node_map); - -#ifdef CONFIG_SMP -extern int x86_cpu_to_node_map_init[]; -extern void *x86_cpu_to_node_map_early_ptr; -#else -#define x86_cpu_to_node_map_early_ptr NULL -#endif +/* Mappings between node number and cpus on that node. */ extern cpumask_t node_to_cpumask_map[]; -#define NUMA_NO_NODE (-1) +/* Mappings between logical cpu number and node number */ +extern int cpu_to_node_map[]; /* Returns the number of the node containing CPU 'cpu' */ -#ifdef CONFIG_X86_32 -#define early_cpu_to_node(cpu) cpu_to_node(cpu) static inline int cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } +#define early_cpu_to_node(cpu) cpu_to_node(cpu) -#else /* CONFIG_X86_64 */ - -#ifdef CONFIG_SMP -static inline int early_cpu_to_node(int cpu) +/* Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +static inline cpumask_t node_to_cpumask(int node) { - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if (cpu_to_node_map) - return cpu_to_node_map[cpu]; - else if (per_cpu_offset(cpu)) - return per_cpu(x86_cpu_to_node_map, cpu); - else - return NUMA_NO_NODE; + return node_to_cpumask_map[node]; } -#else -#define early_cpu_to_node(cpu) cpu_to_node(cpu) -#endif +#else /* CONFIG_X86_64 */ + +/* Mappings between node number and cpus on that node. */ +extern cpumask_t *node_to_cpumask_map; + +/* Mappings between logical cpu number and node number */ +DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); + +/* Returns the number of the current Node. */ +#define numa_node_id() read_pda(nodenumber) + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +extern int cpu_to_node(int cpu); +extern int early_cpu_to_node(int cpu); +extern cpumask_t *_node_to_cpumask_ptr(int node); +extern cpumask_t node_to_cpumask(int node); + +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Returns the number of the node containing CPU 'cpu' */ static inline int cpu_to_node(int cpu) { -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (x86_cpu_to_node_map_early_ptr) { - printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n", - (int)cpu); - dump_stack(); - return ((int *)x86_cpu_to_node_map_early_ptr)[cpu]; - } -#endif return per_cpu(x86_cpu_to_node_map, cpu); } -#ifdef CONFIG_NUMA +/* Same function but used if called before per_cpu areas are setup */ +static inline int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + return per_cpu(x86_cpu_to_node_map, cpu); +} /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &node_to_cpumask_map[node]; +} + +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline cpumask_t node_to_cpumask(int node) +{ + return node_to_cpumask_map[node]; +} + +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Replace default node_to_cpumask_ptr with optimized version */ #define node_to_cpumask_ptr(v, node) \ - cpumask_t *v = &(node_to_cpumask_map[node]) + cpumask_t *v = _node_to_cpumask_ptr(node) #define node_to_cpumask_ptr_next(v, node) \ - v = &(node_to_cpumask_map[node]) -#endif + v = _node_to_cpumask_ptr(node) #endif /* CONFIG_X86_64 */ @@ -117,20 +131,6 @@ static inline int cpu_to_node(int cpu) */ #define parent_node(node) (node) -/* Returns a bitmask of CPUs on Node 'node'. */ -static inline cpumask_t node_to_cpumask(int node) -{ - return node_to_cpumask_map[node]; -} - -/* Returns the number of the first CPU on Node 'node'. */ -static inline int node_to_first_cpu(int node) -{ - cpumask_t mask = node_to_cpumask(node); - - return first_cpu(mask); -} - #define pcibus_to_node(bus) __pcibus_to_node(bus) #define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) @@ -180,12 +180,44 @@ extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) #endif -#else /* CONFIG_NUMA */ +#else /* !CONFIG_NUMA */ +#define numa_node_id() 0 +#define cpu_to_node(cpu) 0 +#define early_cpu_to_node(cpu) 0 + +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &cpu_online_map; +} +static inline cpumask_t node_to_cpumask(int node) +{ + return cpu_online_map; +} +static inline int node_to_first_cpu(int node) +{ + return first_cpu(cpu_online_map); +} + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + cpumask_t *v = _node_to_cpumask_ptr(node) + +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) #endif #include <asm-generic/topology.h> +#ifdef CONFIG_NUMA +/* Returns the number of the first CPU on Node 'node'. */ +static inline int node_to_first_cpu(int node) +{ + node_to_cpumask_ptr(mask, node); + return first_cpu(*mask); +} +#endif + extern cpumask_t cpu_coregroup_map(int cpu); #ifdef ENABLE_TOPO_DEFINES @@ -193,6 +225,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) + +/* indicates that pointers to the topology cpumask_t maps are valid */ +#define arch_provides_topology_pointers yes #endif static inline void arch_fix_phys_package_id(int num, u32 slot) @@ -220,4 +255,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) } #endif -#endif +#endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index 548873ab5fc1..cb6f6ee45b8f 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -48,7 +48,6 @@ static __always_inline cycles_t vget_cycles(void) extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -extern void init_tsc_clocksource(void); int check_tsc_unstable(void); /* @@ -58,7 +57,6 @@ int check_tsc_unstable(void); extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); -extern void tsc_calibrate(void); extern int notsc_setup(char *); #endif diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 9fefd2947e78..f6fa4d841bbc 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -1,5 +1,453 @@ +#ifndef _ASM_UACCES_H_ +#define _ASM_UACCES_H_ +/* + * User space memory access functions + */ +#include <linux/errno.h> +#include <linux/compiler.h> +#include <linux/thread_info.h> +#include <linux/prefetch.h> +#include <linux/string.h> +#include <asm/asm.h> +#include <asm/page.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define KERNEL_DS MAKE_MM_SEG(-1UL) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < \ + (current_thread_info()->addr_limit.seg)) + +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + * + * This is equivalent to the following test: + * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) + * + * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... + */ + +#define __range_not_ok(addr, size) \ +({ \ + unsigned long flag, roksum; \ + __chk_user_ptr(addr); \ + asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ + : "=&r" (flag), "=r" (roksum) \ + : "1" (addr), "g" ((long)(size)), \ + "rm" (current_thread_info()->addr_limit.seg)); \ + flag; \ +}) + +/** + * access_ok: - Checks if a user space pointer is valid + * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that + * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe + * to write to a block, it is always safe to read from it. + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + unsigned long insn, fixup; +}; + +extern int fixup_exception(struct pt_regs *regs); + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the ugliness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern int __get_user_1(void); +extern int __get_user_2(void); +extern int __get_user_4(void); +extern int __get_user_8(void); +extern int __get_user_bad(void); + +#define __get_user_x(size, ret, x, ptr) \ + asm volatile("call __get_user_" #size \ + : "=a" (ret),"=d" (x) \ + : "0" (ptr)) \ + +/* Careful: we have to cast the result to the type of the pointer + * for sign reasons */ + +/** + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#ifdef CONFIG_X86_32 +#define __get_user_8(__ret_gu, __val_gu, ptr) \ + __get_user_x(X, __ret_gu, __val_gu, ptr) +#else +#define __get_user_8(__ret_gu, __val_gu, ptr) \ + __get_user_x(8, __ret_gu, __val_gu, ptr) +#endif + +#define get_user(x, ptr) \ +({ \ + int __ret_gu; \ + unsigned long __val_gu; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_user_x(1, __ret_gu, __val_gu, ptr); \ + break; \ + case 2: \ + __get_user_x(2, __ret_gu, __val_gu, ptr); \ + break; \ + case 4: \ + __get_user_x(4, __ret_gu, __val_gu, ptr); \ + break; \ + case 8: \ + __get_user_8(__ret_gu, __val_gu, ptr); \ + break; \ + default: \ + __get_user_x(X, __ret_gu, __val_gu, ptr); \ + break; \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +#define __put_user_x(size, x, ptr, __ret_pu) \ + asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ + :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + + + +#ifdef CONFIG_X86_32 +#define __put_user_u64(x, addr, err) \ + asm volatile("1: movl %%eax,0(%2)\n" \ + "2: movl %%edx,4(%2)\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=r" (err) \ + : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + +#define __put_user_x8(x, ptr, __ret_pu) \ + asm volatile("call __put_user_8" : "=a" (__ret_pu) \ + : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") +#else +#define __put_user_u64(x, ptr, retval) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) +#endif + +extern void __put_user_bad(void); + +/* + * Strange magic calling convention: pointer in %ecx, + * value in %eax(:%edx), return value in %eax. clobbers %rbx + */ +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +#ifdef CONFIG_X86_WP_WORKS_OK + +/** + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x, ptr) \ +({ \ + int __ret_pu; \ + __typeof__(*(ptr)) __pu_val; \ + __chk_user_ptr(ptr); \ + __pu_val = x; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_user_x(1, __pu_val, ptr, __ret_pu); \ + break; \ + case 2: \ + __put_user_x(2, __pu_val, ptr, __ret_pu); \ + break; \ + case 4: \ + __put_user_x(4, __pu_val, ptr, __ret_pu); \ + break; \ + case 8: \ + __put_user_x8(__pu_val, ptr, __ret_pu); \ + break; \ + default: \ + __put_user_x(X, __pu_val, ptr, __ret_pu); \ + break; \ + } \ + __ret_pu; \ +}) + +#define __put_user_size(x, ptr, size, retval, errret) \ +do { \ + retval = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ + break; \ + case 2: \ + __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ + break; \ + case 4: \ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + break; \ + case 8: \ + __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + +#else + +#define __put_user_size(x, ptr, size, retval, errret) \ +do { \ + __typeof__(*(ptr))__pus_tmp = x; \ + retval = 0; \ + \ + if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ + retval = errret; \ +} while (0) + +#define put_user(x, ptr) \ +({ \ + int __ret_pu; \ + __typeof__(*(ptr))__pus_tmp = x; \ + __ret_pu = 0; \ + if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ + sizeof(*(ptr))) != 0)) \ + __ret_pu = -EFAULT; \ + __ret_pu; \ +}) +#endif + +#ifdef CONFIG_X86_32 +#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#else +#define __get_user_asm_u64(x, ptr, retval, errret) \ + __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#endif + +#define __get_user_size(x, ptr, size, retval, errret) \ +do { \ + retval = 0; \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ + break; \ + case 2: \ + __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ + break; \ + case 4: \ + __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ + break; \ + case 8: \ + __get_user_asm_u64(x, ptr, retval, errret); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (err), ltype(x) \ + : "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_nocheck(x, ptr, size) \ +({ \ + long __pu_err; \ + __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __pu_err; \ +}) + +#define __get_user_nocheck(x, ptr, size) \ +({ \ + long __gu_err; \ + unsigned long __gu_val; \ + __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +/* FIXME: this hack is definitely wrong -AK */ +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r"(err) \ + : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) +/** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ + +#define __get_user(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr))) +/** + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ + +#define __put_user(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user + +/* + * movsl can be slow when source and dest are not both 8-byte aligned + */ +#ifdef CONFIG_X86_INTEL_USERCOPY +extern struct movsl_mask { + int mask; +} ____cacheline_aligned_in_smp movsl_mask; +#endif + +#define ARCH_HAS_NOCACHE_UACCESS 1 + #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else +# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif + +#endif diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index 8e7595c1f34e..6fdef39a0bcb 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -11,426 +11,6 @@ #include <asm/asm.h> #include <asm/page.h> -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFUL) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) - -#define segment_eq(a, b) ((a).seg == (b).seg) - -/* - * movsl can be slow when source and dest are not both 8-byte aligned - */ -#ifdef CONFIG_X86_INTEL_USERCOPY -extern struct movsl_mask { - int mask; -} ____cacheline_aligned_in_smp movsl_mask; -#endif - -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < \ - (current_thread_info()->addr_limit.seg)) - -/* - * Test whether a block of memory is a valid user space address. - * Returns 0 if the range is valid, nonzero otherwise. - * - * This is equivalent to the following test: - * (u33)addr + (u33)size >= (u33)current->addr_limit.seg - * - * This needs 33-bit arithmetic. We have a carry... - */ -#define __range_ok(addr, size) \ -({ \ - unsigned long flag, roksum; \ - __chk_user_ptr(addr); \ - asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ - :"=&r" (flag), "=r" (roksum) \ - :"1" (addr), "g" ((int)(size)), \ - "rm" (current_thread_info()->addr_limit.seg)); \ - flag; \ -}) - -/** - * access_ok: - Checks if a user space pointer is valid - * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that - * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe - * to write to a block, it is always safe to read from it. - * @addr: User space pointer to start of block to check - * @size: Size of block to check - * - * Context: User context only. This function may sleep. - * - * Checks if a pointer to a block of memory in user space is valid. - * - * Returns true (nonzero) if the memory block may be valid, false (zero) - * if it is definitely invalid. - * - * Note that, depending on architecture, this function probably just - * checks that the pointer is in the user space range - after calling - * this function, memory access functions may still return -EFAULT. - */ -#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0)) - -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry { - unsigned long insn, fixup; -}; - -extern int fixup_exception(struct pt_regs *regs); - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the ugliness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). - */ - -extern void __get_user_1(void); -extern void __get_user_2(void); -extern void __get_user_4(void); - -#define __get_user_x(size, ret, x, ptr) \ - asm volatile("call __get_user_" #size \ - :"=a" (ret),"=d" (x) \ - :"0" (ptr)) - - -/* Careful: we have to cast the result to the type of the pointer - * for sign reasons */ - -/** - * get_user: - Get a simple variable from user space. - * @x: Variable to store result. - * @ptr: Source address, in user space. - * - * Context: User context only. This function may sleep. - * - * This macro copies a single simple variable from user space to kernel - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and the result of - * dereferencing @ptr must be assignable to @x without a cast. - * - * Returns zero on success, or -EFAULT on error. - * On error, the variable @x is set to zero. - */ -#define get_user(x, ptr) \ -({ \ - int __ret_gu; \ - unsigned long __val_gu; \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_user_x(1, __ret_gu, __val_gu, ptr); \ - break; \ - case 2: \ - __get_user_x(2, __ret_gu, __val_gu, ptr); \ - break; \ - case 4: \ - __get_user_x(4, __ret_gu, __val_gu, ptr); \ - break; \ - default: \ - __get_user_x(X, __ret_gu, __val_gu, ptr); \ - break; \ - } \ - (x) = (__typeof__(*(ptr)))__val_gu; \ - __ret_gu; \ -}) - -extern void __put_user_bad(void); - -/* - * Strange magic calling convention: pointer in %ecx, - * value in %eax(:%edx), return value in %eax, no clobbers. - */ -extern void __put_user_1(void); -extern void __put_user_2(void); -extern void __put_user_4(void); -extern void __put_user_8(void); - -#define __put_user_1(x, ptr) \ - asm volatile("call __put_user_1" : "=a" (__ret_pu) \ - : "0" ((typeof(*(ptr)))(x)), "c" (ptr)) - -#define __put_user_2(x, ptr) \ - asm volatile("call __put_user_2" : "=a" (__ret_pu) \ - : "0" ((typeof(*(ptr)))(x)), "c" (ptr)) - -#define __put_user_4(x, ptr) \ - asm volatile("call __put_user_4" : "=a" (__ret_pu) \ - : "0" ((typeof(*(ptr)))(x)), "c" (ptr)) - -#define __put_user_8(x, ptr) \ - asm volatile("call __put_user_8" : "=a" (__ret_pu) \ - : "A" ((typeof(*(ptr)))(x)), "c" (ptr)) - -#define __put_user_X(x, ptr) \ - asm volatile("call __put_user_X" : "=a" (__ret_pu) \ - : "c" (ptr)) - -/** - * put_user: - Write a simple value into user space. - * @x: Value to copy to user space. - * @ptr: Destination address, in user space. - * - * Context: User context only. This function may sleep. - * - * This macro copies a single simple value from kernel space to user - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and @x must be assignable - * to the result of dereferencing @ptr. - * - * Returns zero on success, or -EFAULT on error. - */ -#ifdef CONFIG_X86_WP_WORKS_OK - -#define put_user(x, ptr) \ -({ \ - int __ret_pu; \ - __typeof__(*(ptr)) __pu_val; \ - __chk_user_ptr(ptr); \ - __pu_val = x; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __put_user_1(__pu_val, ptr); \ - break; \ - case 2: \ - __put_user_2(__pu_val, ptr); \ - break; \ - case 4: \ - __put_user_4(__pu_val, ptr); \ - break; \ - case 8: \ - __put_user_8(__pu_val, ptr); \ - break; \ - default: \ - __put_user_X(__pu_val, ptr); \ - break; \ - } \ - __ret_pu; \ -}) - -#else -#define put_user(x, ptr) \ -({ \ - int __ret_pu; \ - __typeof__(*(ptr))__pus_tmp = x; \ - __ret_pu = 0; \ - if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ - sizeof(*(ptr))) != 0)) \ - __ret_pu = -EFAULT; \ - __ret_pu; \ -}) - - -#endif - -/** - * __get_user: - Get a simple variable from user space, with less checking. - * @x: Variable to store result. - * @ptr: Source address, in user space. - * - * Context: User context only. This function may sleep. - * - * This macro copies a single simple variable from user space to kernel - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and the result of - * dereferencing @ptr must be assignable to @x without a cast. - * - * Caller must check the pointer with access_ok() before calling this - * function. - * - * Returns zero on success, or -EFAULT on error. - * On error, the variable @x is set to zero. - */ -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) - - -/** - * __put_user: - Write a simple value into user space, with less checking. - * @x: Value to copy to user space. - * @ptr: Destination address, in user space. - * - * Context: User context only. This function may sleep. - * - * This macro copies a single simple value from kernel space to user - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and @x must be assignable - * to the result of dereferencing @ptr. - * - * Caller must check the pointer with access_ok() before calling this - * function. - * - * Returns zero on success, or -EFAULT on error. - */ -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __put_user_nocheck(x, ptr, size) \ -({ \ - long __pu_err; \ - __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ - __pu_err; \ -}) - - -#define __put_user_u64(x, addr, err) \ - asm volatile("1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: movl %3,%0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ - : "=r" (err) \ - : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) - -#ifdef CONFIG_X86_WP_WORKS_OK - -#define __put_user_size(x, ptr, size, retval, errret) \ -do { \ - retval = 0; \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ - break; \ - case 2: \ - __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ - break; \ - case 4: \ - __put_user_asm(x, ptr, retval, "l", "", "ir", errret); \ - break; \ - case 8: \ - __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ - break; \ - default: \ - __put_user_bad(); \ - } \ -} while (0) - -#else - -#define __put_user_size(x, ptr, size, retval, errret) \ -do { \ - __typeof__(*(ptr))__pus_tmp = x; \ - retval = 0; \ - \ - if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ - retval = errret; \ -} while (0) - -#endif -struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct __user *)(x)) - -/* - * Tell gcc we read from memory instead of writing: this is because - * we do not write to any memory gcc knows about, so there are no - * aliasing issues. - */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r"(err) \ - : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err)) - - -#define __get_user_nocheck(x, ptr, size) \ -({ \ - long __gu_err; \ - unsigned long __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ - __gu_err; \ -}) - -extern long __get_user_bad(void); - -#define __get_user_size(x, ptr, size, retval, errret) \ -do { \ - retval = 0; \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ - break; \ - case 2: \ - __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ - break; \ - case 4: \ - __get_user_asm(x, ptr, retval, "l", "", "=r", errret); \ - break; \ - default: \ - (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r" (err), ltype (x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - - unsigned long __must_check __copy_to_user_ll (void __user *to, const void *from, unsigned long n); unsigned long __must_check __copy_from_user_ll @@ -576,8 +156,6 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) return __copy_from_user_ll(to, from, n); } -#define ARCH_HAS_NOCACHE_UACCESS - static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index b8a2f4339903..515d4dce96b5 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -9,265 +9,6 @@ #include <linux/prefetch.h> #include <asm/page.h> -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFFUL) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) - -#define segment_eq(a, b) ((a).seg == (b).seg) - -#define __addr_ok(addr) (!((unsigned long)(addr) & \ - (current_thread_info()->addr_limit.seg))) - -/* - * Uhhuh, this needs 65-bit arithmetic. We have a carry.. - */ -#define __range_not_ok(addr, size) \ -({ \ - unsigned long flag, roksum; \ - __chk_user_ptr(addr); \ - asm("# range_ok\n\r" \ - "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ - : "=&r" (flag), "=r" (roksum) \ - : "1" (addr), "g" ((long)(size)), \ - "g" (current_thread_info()->addr_limit.seg)); \ - flag; \ -}) - -#define access_ok(type, addr, size) (__range_not_ok(addr, size) == 0) - -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry { - unsigned long insn, fixup; -}; - -extern int fixup_exception(struct pt_regs *regs); - -#define ARCH_HAS_SEARCH_EXTABLE - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the ugliness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). - */ - -#define __get_user_x(size, ret, x, ptr) \ - asm volatile("call __get_user_" #size \ - : "=a" (ret),"=d" (x) \ - : "c" (ptr) \ - : "r8") - -/* Careful: we have to cast the result to the type of the pointer - * for sign reasons */ - -#define get_user(x, ptr) \ -({ \ - unsigned long __val_gu; \ - int __ret_gu; \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_user_x(1, __ret_gu, __val_gu, ptr); \ - break; \ - case 2: \ - __get_user_x(2, __ret_gu, __val_gu, ptr); \ - break; \ - case 4: \ - __get_user_x(4, __ret_gu, __val_gu, ptr); \ - break; \ - case 8: \ - __get_user_x(8, __ret_gu, __val_gu, ptr); \ - break; \ - default: \ - __get_user_bad(); \ - break; \ - } \ - (x) = (__force typeof(*(ptr)))__val_gu; \ - __ret_gu; \ -}) - -extern void __put_user_1(void); -extern void __put_user_2(void); -extern void __put_user_4(void); -extern void __put_user_8(void); -extern void __put_user_bad(void); - -#define __put_user_x(size, ret, x, ptr) \ - asm volatile("call __put_user_" #size \ - :"=a" (ret) \ - :"c" (ptr),"d" (x) \ - :"r8") - -#define put_user(x, ptr) \ - __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user_unaligned __get_user -#define __put_user_unaligned __put_user - -#define __put_user_nocheck(x, ptr, size) \ -({ \ - int __pu_err; \ - __put_user_size((x), (ptr), (size), __pu_err); \ - __pu_err; \ -}) - - -#define __put_user_check(x, ptr, size) \ -({ \ - int __pu_err; \ - typeof(*(ptr)) __user *__pu_addr = (ptr); \ - switch (size) { \ - case 1: \ - __put_user_x(1, __pu_err, x, __pu_addr); \ - break; \ - case 2: \ - __put_user_x(2, __pu_err, x, __pu_addr); \ - break; \ - case 4: \ - __put_user_x(4, __pu_err, x, __pu_addr); \ - break; \ - case 8: \ - __put_user_x(8, __pu_err, x, __pu_addr); \ - break; \ - default: \ - __put_user_bad(); \ - } \ - __pu_err; \ -}) - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - retval = 0; \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __put_user_asm(x, ptr, retval, "b", "b", "iq", -EFAULT);\ - break; \ - case 2: \ - __put_user_asm(x, ptr, retval, "w", "w", "ir", -EFAULT);\ - break; \ - case 4: \ - __put_user_asm(x, ptr, retval, "l", "k", "ir", -EFAULT);\ - break; \ - case 8: \ - __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT); \ - break; \ - default: \ - __put_user_bad(); \ - } \ -} while (0) - -/* FIXME: this hack is definitely wrong -AK */ -struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct __user *)(x)) - -/* - * Tell gcc we read from memory instead of writing: this is because - * we do not write to any memory gcc knows about, so there are no - * aliasing issues. - */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup, \"ax\"\n" \ - "3: mov %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r"(err) \ - : ltype (x), "m" (__m(addr)), "i" (errno), "0" (err)) - - -#define __get_user_nocheck(x, ptr, size) \ -({ \ - int __gu_err; \ - unsigned long __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force typeof(*(ptr)))__gu_val; \ - __gu_err; \ -}) - -extern int __get_user_1(void); -extern int __get_user_2(void); -extern int __get_user_4(void); -extern int __get_user_8(void); -extern int __get_user_bad(void); - -#define __get_user_size(x, ptr, size, retval) \ -do { \ - retval = 0; \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __get_user_asm(x, ptr, retval, "b", "b", "=q", -EFAULT);\ - break; \ - case 2: \ - __get_user_asm(x, ptr, retval, "w", "w", "=r", -EFAULT);\ - break; \ - case 4: \ - __get_user_asm(x, ptr, retval, "l", "k", "=r", -EFAULT);\ - break; \ - case 8: \ - __get_user_asm(x, ptr, retval, "q", "", "=r", -EFAULT); \ - break; \ - default: \ - (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup, \"ax\"\n" \ - "3: mov %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r" (err), ltype (x) \ - : "m" (__m(addr)), "i"(errno), "0"(err)) - /* * Copy To/From Userspace */ @@ -437,7 +178,6 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) return copy_user_generic((__force void *)dst, src, size); } -#define ARCH_HAS_NOCACHE_UACCESS 1 extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); @@ -455,4 +195,7 @@ static inline int __copy_from_user_inatomic_nocache(void *dst, return __copy_user_nocache(dst, src, size, 0); } +unsigned long +copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); + #endif /* __X86_64_UACCESS_H */ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index fe26e36d0f51..9c1a4a3470d9 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -290,7 +290,7 @@ __SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) #define __NR_rt_sigqueueinfo 129 __SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) #define __NR_rt_sigsuspend 130 -__SYSCALL(__NR_rt_sigsuspend, stub_rt_sigsuspend) +__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) #define __NR_sigaltstack 131 __SYSCALL(__NR_sigaltstack, stub_sigaltstack) #define __NR_utime 132 diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h new file mode 100644 index 000000000000..91ac0dfb7588 --- /dev/null +++ b/include/asm-x86/uv/uv_bau.h @@ -0,0 +1,337 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV Broadcast Assist Unit definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef __ASM_X86_UV_BAU__ +#define __ASM_X86_UV_BAU__ + +#include <linux/bitmap.h> +#define BITSPERBYTE 8 + +/* + * Broadcast Assist Unit messaging structures + * + * Selective Broadcast activations are induced by software action + * specifying a particular 8-descriptor "set" via a 6-bit index written + * to an MMR. + * Thus there are 64 unique 512-byte sets of SB descriptors - one set for + * each 6-bit index value. These descriptor sets are mapped in sequence + * starting with set 0 located at the address specified in the + * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, + * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. + * + * We will use 31 sets, one for sending BAU messages from each of the 32 + * cpu's on the node. + * + * TLB shootdown will use the first of the 8 descriptors of each set. + * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). + */ + +#define UV_ITEMS_PER_DESCRIPTOR 8 +#define UV_CPUS_PER_ACT_STATUS 32 +#define UV_ACT_STATUS_MASK 0x3 +#define UV_ACT_STATUS_SIZE 2 +#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 +#define UV_DISTRIBUTION_SIZE 256 +#define UV_SW_ACK_NPENDING 8 +#define UV_BAU_MESSAGE 200 +/* + * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h + * To be dynamically allocated in the future + */ +#define UV_NET_ENDPOINT_INTD 0x38 +#define UV_DESC_BASE_PNODE_SHIFT 49 +#define UV_PAYLOADQ_PNODE_SHIFT 49 +#define UV_PTC_BASENAME "sgi_uv/ptc_statistics" +#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) + +/* + * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 + */ +#define DESC_STATUS_IDLE 0 +#define DESC_STATUS_ACTIVE 1 +#define DESC_STATUS_DESTINATION_TIMEOUT 2 +#define DESC_STATUS_SOURCE_TIMEOUT 3 + +/* + * source side threshholds at which message retries print a warning + */ +#define SOURCE_TIMEOUT_LIMIT 20 +#define DESTINATION_TIMEOUT_LIMIT 20 + +/* + * number of entries in the destination side payload queue + */ +#define DEST_Q_SIZE 17 +/* + * number of destination side software ack resources + */ +#define DEST_NUM_RESOURCES 8 +#define MAX_CPUS_PER_NODE 32 +/* + * completion statuses for sending a TLB flush message + */ +#define FLUSH_RETRY 1 +#define FLUSH_GIVEUP 2 +#define FLUSH_COMPLETE 3 + +/* + * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) + * If the 'multilevel' flag in the header portion of the descriptor + * has been set to 0, then endpoint multi-unicast mode is selected. + * The distribution specification (32 bytes) is interpreted as a 256-bit + * distribution vector. Adjacent bits correspond to consecutive even numbered + * nodeIDs. The result of adding the index of a given bit to the 15-bit + * 'base_dest_nodeid' field of the header corresponds to the + * destination nodeID associated with that specified bit. + */ +struct bau_target_nodemask { + unsigned long bits[BITS_TO_LONGS(256)]; +}; + +/* + * mask of cpu's on a node + * (during initialization we need to check that unsigned long has + * enough bits for max. cpu's per node) + */ +struct bau_local_cpumask { + unsigned long bits; +}; + +/* + * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor) + * only 12 bytes (96 bits) of the payload area are usable. + * An additional 3 bytes (bits 27:4) of the header address are carried + * to the next bytes of the destination payload queue. + * And an additional 2 bytes of the header Suppl_A field are also + * carried to the destination payload queue. + * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte) + * of the destination payload queue, which is written by the hardware + * with the s/w ack resource bit vector. + * [ effective message contents (16 bytes (128 bits) maximum), not counting + * the s/w ack bit vector ] + */ + +/* + * The payload is software-defined for INTD transactions + */ +struct bau_msg_payload { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ + /* 64 bits */ + unsigned short sending_cpu; /* filled in by sender */ + /* 16 bits */ + unsigned short acknowledge_count;/* filled in by destination */ + /* 16 bits */ + unsigned int reserved1:32; /* not usable */ +}; + + +/* + * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) + * see table 4.2.3.0.1 in broacast_assist spec. + */ +struct bau_msg_header { + int dest_subnodeid:6; /* must be zero */ + /* bits 5:0 */ + int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ + /* bits 20:6 */ + int command:8; /* message type */ + /* bits 28:21 */ + /* 0x38: SN3net EndPoint Message */ + int rsvd_1:3; /* must be zero */ + /* bits 31:29 */ + /* int will align on 32 bits */ + int rsvd_2:9; /* must be zero */ + /* bits 40:32 */ + /* Suppl_A is 56-41 */ + int payload_2a:8; /* becomes byte 16 of msg */ + /* bits 48:41 */ /* not currently using */ + int payload_2b:8; /* becomes byte 17 of msg */ + /* bits 56:49 */ /* not currently using */ + /* Address field (96:57) is never used as an + address (these are address bits 42:3) */ + int rsvd_3:1; /* must be zero */ + /* bit 57 */ + /* address bits 27:4 are payload */ + /* these 24 bits become bytes 12-14 of msg */ + int replied_to:1; /* sent as 0 by the source to byte 12 */ + /* bit 58 */ + + int payload_1a:5; /* not currently used */ + /* bits 63:59 */ + int payload_1b:8; /* not currently used */ + /* bits 71:64 */ + int payload_1c:8; /* not currently used */ + /* bits 79:72 */ + int payload_1d:2; /* not currently used */ + /* bits 81:80 */ + + int rsvd_4:7; /* must be zero */ + /* bits 88:82 */ + int sw_ack_flag:1; /* software acknowledge flag */ + /* bit 89 */ + /* INTD trasactions at destination are to + wait for software acknowledge */ + int rsvd_5:6; /* must be zero */ + /* bits 95:90 */ + int rsvd_6:5; /* must be zero */ + /* bits 100:96 */ + int int_both:1; /* if 1, interrupt both sockets on the blade */ + /* bit 101*/ + int fairness:3; /* usually zero */ + /* bits 104:102 */ + int multilevel:1; /* multi-level multicast format */ + /* bit 105 */ + /* 0 for TLB: endpoint multi-unicast messages */ + int chaining:1; /* next descriptor is part of this activation*/ + /* bit 106 */ + int rsvd_7:21; /* must be zero */ + /* bits 127:107 */ +}; + +/* + * The activation descriptor: + * The format of the message to send, plus all accompanying control + * Should be 64 bytes + */ +struct bau_desc { + struct bau_target_nodemask distribution; + /* + * message template, consisting of header and payload: + */ + struct bau_msg_header header; + struct bau_msg_payload payload; +}; +/* + * -payload-- ---------header------ + * bytes 0-11 bits 41-56 bits 58-81 + * A B (2) C (3) + * + * A/B/C are moved to: + * A C B + * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) + * ------------payload queue----------- + */ + +/* + * The payload queue on the destination side is an array of these. + * With BAU_MISC_CONTROL set for software acknowledge mode, the messages + * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 + * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) + * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from + * sw_ack_vector and payload_2) + * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software + * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload + * operation." + */ +struct bau_payload_queue_entry { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ + /* 64 bits, bytes 0-7 */ + + unsigned short sending_cpu; /* cpu that sent the message */ + /* 16 bits, bytes 8-9 */ + + unsigned short acknowledge_count; /* filled in by destination */ + /* 16 bits, bytes 10-11 */ + + unsigned short replied_to:1; /* sent as 0 by the source */ + /* 1 bit */ + unsigned short unused1:7; /* not currently using */ + /* 7 bits: byte 12) */ + + unsigned char unused2[2]; /* not currently using */ + /* bytes 13-14 */ + + unsigned char sw_ack_vector; /* filled in by the hardware */ + /* byte 15 (bits 127:120) */ + + unsigned char unused4[3]; /* not currently using bytes 17-19 */ + /* bytes 17-19 */ + + int number_of_cpus; /* filled in at destination */ + /* 32 bits, bytes 20-23 (aligned) */ + + unsigned char unused5[8]; /* not using */ + /* bytes 24-31 */ +}; + +/* + * one for every slot in the destination payload queue + */ +struct bau_msg_status { + struct bau_local_cpumask seen_by; /* map of cpu's */ +}; + +/* + * one for every slot in the destination software ack resources + */ +struct bau_sw_ack_status { + struct bau_payload_queue_entry *msg; /* associated message */ + int watcher; /* cpu monitoring, or -1 */ +}; + +/* + * one on every node and per-cpu; to locate the software tables + */ +struct bau_control { + struct bau_desc *descriptor_base; + struct bau_payload_queue_entry *bau_msg_head; + struct bau_payload_queue_entry *va_queue_first; + struct bau_payload_queue_entry *va_queue_last; + struct bau_msg_status *msg_statuses; + int *watching; /* pointer to array */ +}; + +/* + * This structure is allocated per_cpu for UV TLB shootdown statistics. + */ +struct ptc_stats { + unsigned long ptc_i; /* number of IPI-style flushes */ + unsigned long requestor; /* number of nodes this cpu sent to */ + unsigned long requestee; /* times cpu was remotely requested */ + unsigned long alltlb; /* times all tlb's on this cpu were flushed */ + unsigned long onetlb; /* times just one tlb on this cpu was flushed */ + unsigned long s_retry; /* retries on source side timeouts */ + unsigned long d_retry; /* retries on destination side timeouts */ + unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ + unsigned long dflush; /* cycles spent on destination side */ + unsigned long retriesok; /* successes on retries */ + unsigned long nomsg; /* interrupts with no message */ + unsigned long multmsg; /* interrupts with multiple messages */ + unsigned long ntargeted;/* nodes targeted */ +}; + +static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) +{ + return constant_test_bit(node, &dstp->bits[0]); +} +static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) +{ + __set_bit(node, &dstp->bits[0]); +} +static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) +{ + bitmap_zero(&dstp->bits[0], nbits); +} + +static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) +{ + bitmap_zero(&dstp->bits, nbits); +} + +#define cpubit_isset(cpu, bau_local_cpumask) \ + test_bit((cpu), (bau_local_cpumask).bits) + +extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern void uv_bau_message_intr1(void); +extern void uv_bau_timeout_intr1(void); + +#endif /* __ASM_X86_UV_BAU__ */ diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h index 26b9240d1e23..a4ef26e5850b 100644 --- a/include/asm-x86/uv/uv_hub.h +++ b/include/asm-x86/uv/uv_hub.h @@ -5,7 +5,7 @@ * * SGI UV architectural definitions * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ #ifndef __ASM_X86_UV_HUB_H__ @@ -20,26 +20,49 @@ /* * Addressing Terminology * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. * - * SNASID - NASID right shifted by 1 bit. + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) * * * Memory/UV-HUB Processor Socket Address Format: - * +--------+---------------+---------------------+ - * |00..0000| SNASID | NodeOffset | - * +--------+---------------+---------------------+ - * <--- N bits --->|<--------M bits -----> + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> * - * M number of node offset bits (35 .. 40) - * N number of SNASID bits (0 .. 10) + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) * * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). * The actual values are configuration dependent and are set at - * boot time + * boot time. M & N values are set by the hardware/BIOS at boot. + * * * APICID format * NOTE!!!!!! This is the current format of the APICID. However, code @@ -48,14 +71,14 @@ * * 1111110000000000 * 5432109876543210 - * nnnnnnnnnnlc0cch + * pppppppppplc0cch * sssssssssss * - * n = snasid bits + * p = pnode bits * l = socket number on board * c = core * h = hyperthread - * s = bits that are in the socket CSR + * s = bits that are in the SOCKET_ID CSR * * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. @@ -74,7 +97,7 @@ * This value is also the value of the maximum number of non-router NASIDs * in the numalink fabric. * - * NOTE: a brick may be 1 or 2 OS nodes. Don't get these confused. + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. */ #define UV_MAX_NUMALINK_BLADES 16384 @@ -96,8 +119,12 @@ */ struct uv_hub_info_s { unsigned long global_mmr_base; - unsigned short local_nasid; - unsigned short gnode_upper; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; unsigned short coherency_domain_number; unsigned short numa_blade_id; unsigned char blade_processor_id; @@ -112,83 +139,126 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); * Local & Global MMR space macros. * Note: macros are intended to be used ONLY by inline functions * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ -#define UV_SNASID(n) ((n) >> 1) -#define UV_NASID(n) ((n) << 1) +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) #define UV_LOCAL_MMR_BASE 0xf4000000UL #define UV_GLOBAL_MMR32_BASE 0xf8000000UL #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) +#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) +#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) -#define UV_GLOBAL_MMR32_SNASID_MASK 0x3ff -#define UV_GLOBAL_MMR32_SNASID_SHIFT 15 -#define UV_GLOBAL_MMR64_SNASID_SHIFT 26 +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 -#define UV_GLOBAL_MMR32_NASID_BITS(n) \ - (((UV_SNASID(n) & UV_GLOBAL_MMR32_SNASID_MASK)) << \ - (UV_GLOBAL_MMR32_SNASID_SHIFT)) +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) -#define UV_GLOBAL_MMR64_NASID_BITS(n) \ - ((unsigned long)UV_SNASID(n) << UV_GLOBAL_MMR64_SNASID_SHIFT) +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +#define UV_APIC_PNODE_SHIFT 6 + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} -#define UV_APIC_NASID_SHIFT 6 /* - * Extract a NASID from an APICID (full apicid, not processor subset) + * Extract a PNODE from an APICID (full apicid, not processor subset) */ -static inline int uv_apicid_to_nasid(int apicid) +static inline int uv_apicid_to_pnode(int apicid) { - return (UV_NASID(apicid >> UV_APIC_NASID_SHIFT)); + return (apicid >> UV_APIC_PNODE_SHIFT); } /* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int nasid, +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | - UV_GLOBAL_MMR32_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int nasid, unsigned long offset, +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr32_address(nasid, offset) = val; + *uv_global_mmr32_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr32(int nasid, +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { - return *uv_global_mmr32_address(nasid, offset); + return *uv_global_mmr32_address(pnode, offset); } /* * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int nasid, +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | - UV_GLOBAL_MMR64_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int nasid, unsigned long offset, +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr64_address(nasid, offset) = val; + *uv_global_mmr64_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr64(int nasid, +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { - return *uv_global_mmr64_address(nasid, offset); + return *uv_global_mmr64_address(pnode, offset); } /* - * Access node local MMRs. Faster than using global space but only local MMRs + * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. */ static inline unsigned long *uv_local_mmr_address(unsigned long offset) @@ -207,15 +277,15 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) } /* - * Structures and definitions for converting between cpu, node, and blade + * Structures and definitions for converting between cpu, node, pnode, and blade * numbers. */ struct uv_blade_info { - unsigned short nr_posible_cpus; + unsigned short nr_possible_cpus; unsigned short nr_online_cpus; - unsigned short nasid; + unsigned short pnode; }; -struct uv_blade_info *uv_blade_info; +extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; extern short *uv_cpu_to_blade; extern short uv_possible_blades; @@ -244,16 +314,16 @@ static inline int uv_node_to_blade_id(int nid) return uv_node_to_blade[nid]; } -/* Convert a blade id to the NASID of the blade */ -static inline int uv_blade_to_nasid(int bid) +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) { - return uv_blade_info[bid].nasid; + return uv_blade_info[bid].pnode; } /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { - return uv_blade_info[bid].nr_posible_cpus; + return uv_blade_info[bid].nr_possible_cpus; } /* Determine the number of online cpus on a blade */ @@ -262,16 +332,16 @@ static inline int uv_blade_nr_online_cpus(int bid) return uv_blade_info[bid].nr_online_cpus; } -/* Convert a cpu id to the NASID of the blade containing the cpu */ -static inline int uv_cpu_to_nasid(int cpu) +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) { - return uv_blade_info[uv_cpu_to_blade_id(cpu)].nasid; + return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode; } -/* Convert a node number to the NASID of the blade */ -static inline int uv_node_to_nasid(int nid) +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) { - return uv_blade_info[uv_node_to_blade_id(nid)].nasid; + return uv_blade_info[uv_node_to_blade_id(nid)].pnode; } /* Maximum possible number of blades */ diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h index 3b69fe6b6376..151fd7fcb809 100644 --- a/include/asm-x86/uv/uv_mmrs.h +++ b/include/asm-x86/uv/uv_mmrs.h @@ -11,17 +11,290 @@ #ifndef __ASM_X86_UV_MMRS__ #define __ASM_X86_UV_MMRS__ -/* - * AUTO GENERATED - Do not edit - */ +#define UV_MMR_ENABLE (1UL << 63) + +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0438 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0 0x70000UL +#define UVH_EVENT_OCCURRED0_32 0x005e8 + +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +union uvh_event_occurred0_u { + unsigned long v; + struct uvh_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW, W1C */ + unsigned long gr0_hcerr : 1; /* RW, W1C */ + unsigned long gr1_hcerr : 1; /* RW, W1C */ + unsigned long lh_hcerr : 1; /* RW, W1C */ + unsigned long rh_hcerr : 1; /* RW, W1C */ + unsigned long xn_hcerr : 1; /* RW, W1C */ + unsigned long si_hcerr : 1; /* RW, W1C */ + unsigned long lb_aoerr0 : 1; /* RW, W1C */ + unsigned long gr0_aoerr0 : 1; /* RW, W1C */ + unsigned long gr1_aoerr0 : 1; /* RW, W1C */ + unsigned long lh_aoerr0 : 1; /* RW, W1C */ + unsigned long rh_aoerr0 : 1; /* RW, W1C */ + unsigned long xn_aoerr0 : 1; /* RW, W1C */ + unsigned long si_aoerr0 : 1; /* RW, W1C */ + unsigned long lb_aoerr1 : 1; /* RW, W1C */ + unsigned long gr0_aoerr1 : 1; /* RW, W1C */ + unsigned long gr1_aoerr1 : 1; /* RW, W1C */ + unsigned long lh_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_aoerr1 : 1; /* RW, W1C */ + unsigned long xn_aoerr1 : 1; /* RW, W1C */ + unsigned long si_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_vpi_int : 1; /* RW, W1C */ + unsigned long system_shutdown_int : 1; /* RW, W1C */ + unsigned long lb_irq_int_0 : 1; /* RW, W1C */ + unsigned long lb_irq_int_1 : 1; /* RW, W1C */ + unsigned long lb_irq_int_2 : 1; /* RW, W1C */ + unsigned long lb_irq_int_3 : 1; /* RW, W1C */ + unsigned long lb_irq_int_4 : 1; /* RW, W1C */ + unsigned long lb_irq_int_5 : 1; /* RW, W1C */ + unsigned long lb_irq_int_6 : 1; /* RW, W1C */ + unsigned long lb_irq_int_7 : 1; /* RW, W1C */ + unsigned long lb_irq_int_8 : 1; /* RW, W1C */ + unsigned long lb_irq_int_9 : 1; /* RW, W1C */ + unsigned long lb_irq_int_10 : 1; /* RW, W1C */ + unsigned long lb_irq_int_11 : 1; /* RW, W1C */ + unsigned long lb_irq_int_12 : 1; /* RW, W1C */ + unsigned long lb_irq_int_13 : 1; /* RW, W1C */ + unsigned long lb_irq_int_14 : 1; /* RW, W1C */ + unsigned long lb_irq_int_15 : 1; /* RW, W1C */ + unsigned long l1_nmi_int : 1; /* RW, W1C */ + unsigned long stop_clock : 1; /* RW, W1C */ + unsigned long asic_to_l1 : 1; /* RW, W1C */ + unsigned long l1_to_asic : 1; /* RW, W1C */ + unsigned long ltc_int : 1; /* RW, W1C */ + unsigned long la_seq_trigger : 1; /* RW, W1C */ + unsigned long ipi_int : 1; /* RW, W1C */ + unsigned long extio_int0 : 1; /* RW, W1C */ + unsigned long extio_int1 : 1; /* RW, W1C */ + unsigned long extio_int2 : 1; /* RW, W1C */ + unsigned long extio_int3 : 1; /* RW, W1C */ + unsigned long profile_int : 1; /* RW, W1C */ + unsigned long rtc0 : 1; /* RW, W1C */ + unsigned long rtc1 : 1; /* RW, W1C */ + unsigned long rtc2 : 1; /* RW, W1C */ + unsigned long rtc3 : 1; /* RW, W1C */ + unsigned long bau_data : 1; /* RW, W1C */ + unsigned long power_management_req : 1; /* RW, W1C */ + unsigned long rsvd_57_63 : 7; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 + +/* ========================================================================= */ +/* UVH_INT_CMPB */ +/* ========================================================================= */ +#define UVH_INT_CMPB 0x22080UL + +#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpb_u { + unsigned long v; + struct uvh_int_cmpb_s { + unsigned long real_time_cmpb : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPC */ +/* ========================================================================= */ +#define UVH_INT_CMPC 0x22100UL + +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpc_u { + unsigned long v; + struct uvh_int_cmpc_s { + unsigned long real_time_cmpc : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPD */ +/* ========================================================================= */ +#define UVH_INT_CMPD 0x22180UL - #define UV_MMR_ENABLE (1UL << 63) +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpd_u { + unsigned long v; + struct uvh_int_cmpd_s { + unsigned long real_time_cmpd : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; /* ========================================================================= */ /* UVH_IPI_INT */ /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UVH_IPI_INT_32 0x0360 +#define UVH_IPI_INT_32 0x0348 #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL @@ -51,7 +324,7 @@ union uvh_ipi_int_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009f0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL @@ -73,7 +346,7 @@ union uvh_lb_bau_intd_payload_queue_first_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009f8 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL @@ -91,7 +364,7 @@ union uvh_lb_bau_intd_payload_queue_last_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x00a00 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL @@ -109,6 +382,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL @@ -169,12 +443,13 @@ union uvh_lb_bau_intd_software_acknowledge_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009d8 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL @@ -197,7 +472,7 @@ union uvh_lb_bau_sb_activation_control_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009e0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL @@ -213,7 +488,7 @@ union uvh_lb_bau_sb_activation_status_0_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009e8 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL @@ -229,7 +504,7 @@ union uvh_lb_bau_sb_activation_status_1_u { /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009d0 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL @@ -248,6 +523,334 @@ union uvh_lb_bau_sb_descriptor_base_u { }; /* ========================================================================= */ +/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ +/* ========================================================================= */ +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL + +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL + +union uvh_lb_mcast_aoerr0_rpt_enable_u { + unsigned long v; + struct uvh_lb_mcast_aoerr0_rpt_enable_s { + unsigned long mcast_obese_msg : 1; /* RW */ + unsigned long mcast_data_sb_err : 1; /* RW */ + unsigned long mcast_nack_buff_parity : 1; /* RW */ + unsigned long mcast_timeout : 1; /* RW */ + unsigned long mcast_inactive_reply : 1; /* RW */ + unsigned long mcast_upgrade_error : 1; /* RW */ + unsigned long mcast_reg_count_underflow : 1; /* RW */ + unsigned long mcast_rep_obese_msg : 1; /* RW */ + unsigned long ucache_req_runt_msg : 1; /* RW */ + unsigned long ucache_req_obese_msg : 1; /* RW */ + unsigned long ucache_req_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_runt_msg : 1; /* RW */ + unsigned long ucache_rep_obese_msg : 1; /* RW */ + unsigned long ucache_rep_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_command_err : 1; /* RW */ + unsigned long ucache_pend_timeout : 1; /* RW */ + unsigned long macc_req_runt_msg : 1; /* RW */ + unsigned long macc_req_obese_msg : 1; /* RW */ + unsigned long macc_req_data_sb_err : 1; /* RW */ + unsigned long macc_rep_runt_msg : 1; /* RW */ + unsigned long macc_rep_obese_msg : 1; /* RW */ + unsigned long macc_rep_data_sb_err : 1; /* RW */ + unsigned long macc_amo_timeout : 1; /* RW */ + unsigned long macc_put_timeout : 1; /* RW */ + unsigned long macc_spurious_event : 1; /* RW */ + unsigned long ioh_destination_table_parity : 1; /* RW */ + unsigned long get_had_error_reply : 1; /* RW */ + unsigned long get_timeout : 1; /* RW */ + unsigned long lock_manager_had_error_reply : 1; /* RW */ + unsigned long put_had_error_reply : 1; /* RW */ + unsigned long put_timeout : 1; /* RW */ + unsigned long sb_activation_overrun : 1; /* RW */ + unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ + unsigned long completed_gb_activation_timeout : 1; /* RW */ + unsigned long descriptor_buffer_0_parity : 1; /* RW */ + unsigned long descriptor_buffer_1_parity : 1; /* RW */ + unsigned long socket_destination_table_parity : 1; /* RW */ + unsigned long bau_reply_payload_corruption : 1; /* RW */ + unsigned long io_port_destination_table_parity : 1; /* RW */ + unsigned long intd_soft_ack_timeout : 1; /* RW */ + unsigned long int_rep_obese_msg : 1; /* RW */ + unsigned long int_rep_command_err : 1; /* RW */ + unsigned long int_timeout : 1; /* RW */ + unsigned long rsvd_43_63 : 21; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_CONFIG 0x61000UL + +#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 +#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 +#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 +#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 +#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_local_int0_config_u { + unsigned long v; + struct uvh_local_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_ENABLE */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_ENABLE 0x65000UL + +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL + +union uvh_local_int0_enable_u { + unsigned long v; + struct uvh_local_int0_enable_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long lh_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long xn_hcerr : 1; /* RW */ + unsigned long si_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long lh_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long xn_aoerr0 : 1; /* RW */ + unsigned long si_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long lh_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long xn_aoerr1 : 1; /* RW */ + unsigned long si_aoerr1 : 1; /* RW */ + unsigned long rh_vpi_int : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long ltc_int : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long rsvd_45_63 : 19; /* */ + } s; +}; + +/* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ #define UVH_NODE_ID 0x0UL @@ -284,14 +887,101 @@ union uvh_node_id_u { }; /* ========================================================================= */ +/* UVH_NODE_PRESENT_TABLE */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UVH_NODE_PRESENT_TABLE_DEPTH 16 + +#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +union uvh_node_present_table_u { + unsigned long v; + struct uvh_node_present_table_s { + unsigned long nodes : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL + +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_cfg_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_cfg_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62: 17; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 46 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0000400000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 @@ -302,8 +992,9 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { struct uvh_rh_gam_gru_overlay_config_mmr_s { unsigned long rsvd_0_27: 28; /* */ unsigned long base : 18; /* RW */ + unsigned long rsvd_46_47: 2; /* */ unsigned long gr4 : 1; /* RW */ - unsigned long rsvd_47_51: 5; /* */ + unsigned long rsvd_49_51: 3; /* */ unsigned long n_gru : 4; /* RW */ unsigned long rsvd_56_62: 7; /* */ unsigned long enable : 1; /* RW */ @@ -311,6 +1002,32 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { }; /* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_mmioh_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_mmioh_overlay_config_mmr_s { + unsigned long rsvd_0_29: 30; /* */ + unsigned long base : 16; /* RW */ + unsigned long m_io : 6; /* RW */ + unsigned long n_io : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL @@ -336,7 +1053,7 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u { /* ========================================================================= */ /* UVH_RTC */ /* ========================================================================= */ -#define UVH_RTC 0x28000UL +#define UVH_RTC 0x340000UL #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL @@ -350,6 +1067,139 @@ union uvh_rtc_u { }; /* ========================================================================= */ +/* UVH_RTC1_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC1_INT_CONFIG 0x615c0UL + +#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc1_int_config_u { + unsigned long v; + struct uvh_rtc1_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC2_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC2_INT_CONFIG 0x61600UL + +#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC2_INT_CONFIG_P_SHFT 13 +#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC2_INT_CONFIG_T_SHFT 15 +#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC2_INT_CONFIG_M_SHFT 16 +#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc2_int_config_u { + unsigned long v; + struct uvh_rtc2_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC3_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC3_INT_CONFIG 0x61640UL + +#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC3_INT_CONFIG_P_SHFT 13 +#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC3_INT_CONFIG_T_SHFT 15 +#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC3_INT_CONFIG_M_SHFT 16 +#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc3_int_config_u { + unsigned long v; + struct uvh_rtc3_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC_INC_RATIO */ +/* ========================================================================= */ +#define UVH_RTC_INC_RATIO 0x350000UL + +#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 +#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL +#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 +#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL + +union uvh_rtc_inc_ratio_u { + unsigned long v; + struct uvh_rtc_inc_ratio_s { + unsigned long fraction : 20; /* RW */ + unsigned long ratio : 3; /* RW */ + unsigned long rsvd_23_63: 41; /* */ + } s; +}; + +/* ========================================================================= */ /* UVH_SI_ADDR_MAP_CONFIG */ /* ========================================================================= */ #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL @@ -369,5 +1219,77 @@ union uvh_si_addr_map_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + #endif /* __ASM_X86_UV_MMRS__ */ diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index 074b357146df..5ce351325e01 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -14,12 +14,6 @@ #include <asm/processor-flags.h> -#ifdef CONFIG_VM86 -#define X86_VM_MASK X86_EFLAGS_VM -#else -#define X86_VM_MASK 0 /* No VM86 support */ -#endif - #define BIOSSEG 0x0f000 #define CPU_086 0 @@ -121,7 +115,6 @@ struct vm86plus_info_struct { unsigned long is_vm86pus:1; /* for vm86 internal use */ unsigned char vm86dbg_intxxtab[32]; /* for debugger */ }; - struct vm86plus_struct { struct vm86_regs regs; unsigned long flags; @@ -133,6 +126,9 @@ struct vm86plus_struct { }; #ifdef __KERNEL__ + +#include <asm/ptrace.h> + /* * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 * mode - the main change is that the old segment descriptors aren't @@ -141,7 +137,6 @@ struct vm86plus_struct { * at the end of the structure. Look at ptrace.h to see the "normal" * setup. For user space layout see 'struct vm86_regs' above. */ -#include <asm/ptrace.h> struct kernel_vm86_regs { /* diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h index 478188130328..c3118c385156 100644 --- a/include/asm-x86/vmi_time.h +++ b/include/asm-x86/vmi_time.h @@ -50,7 +50,7 @@ extern void __init vmi_time_init(void); extern unsigned long vmi_get_wallclock(void); extern int vmi_set_wallclock(unsigned long now); extern unsigned long long vmi_sched_clock(void); -extern unsigned long vmi_cpu_khz(void); +extern unsigned long vmi_tsc_khz(void); #ifdef CONFIG_X86_LOCAL_APIC extern void __devinit vmi_time_bsp_init(void); diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index c2ccd997ed35..2a4f9b41d684 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set) } static inline int -HYPERVISOR_sched_op(int cmd, unsigned long arg) +HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op, cmd, arg); + return _hypercall2(int, sched_op_new, cmd, arg); } static inline long @@ -315,6 +315,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) } static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; +} + +static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, pte_t new_val, unsigned long flags) { diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index e11f24038b1d..377c04591c15 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -26,15 +26,20 @@ typedef struct xpaddr { #define FOREIGN_FRAME_BIT (1UL<<31) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) -extern unsigned long *phys_to_machine_mapping; +/* Maximum amount of memory we can handle in a domain in pages */ +#define MAX_DOMAIN_PAGES \ + ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) + + +extern unsigned long get_phys_to_machine(unsigned long pfn); +extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) @@ -42,7 +47,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; - return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } static inline unsigned long mfn_to_pfn(unsigned long mfn) @@ -106,20 +111,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) - && (phys_to_machine_mapping[pfn] != mfn)) + && (get_phys_to_machine(pfn) != mfn)) return max_mapnr; /* force !pfn_valid() */ + /* XXX fixme; not true with sparsemem */ return pfn; } -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } - phys_to_machine_mapping[pfn] = mfn; -} - /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h index 067b5c1835a3..921b45840449 100644 --- a/include/asm-x86/xor_32.h +++ b/include/asm-x86/xor_32.h @@ -1,3 +1,6 @@ +#ifndef ASM_X86__XOR_32_H +#define ASM_X86__XOR_32_H + /* * Optimized RAID-5 checksumming functions for MMX and SSE. * @@ -881,3 +884,5 @@ do { \ deals with a load to a line that is being prefetched. */ #define XOR_SELECT_TEMPLATE(FASTEST) \ (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) + +#endif /* ASM_X86__XOR_32_H */ diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h index 24957e39ac8a..2d3a18de295b 100644 --- a/include/asm-x86/xor_64.h +++ b/include/asm-x86/xor_64.h @@ -1,3 +1,6 @@ +#ifndef ASM_X86__XOR_64_H +#define ASM_X86__XOR_64_H + /* * Optimized RAID-5 checksumming functions for MMX and SSE. * @@ -354,3 +357,5 @@ do { \ We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) + +#endif /* ASM_X86__XOR_64_H */ diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b6fbb2573e88..71d70d1fbce2 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -166,7 +166,7 @@ unifdef-y += acct.h unifdef-y += adb.h unifdef-y += adfs_fs.h unifdef-y += agpgart.h -ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h) +ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),) unifdef-y += a.out.h endif unifdef-y += apm_bios.h diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7a..0601075d09a1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); +int acpi_mps_check (void); int acpi_numa_init (void); int acpi_table_init (void); @@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void) return 0; } +static inline int acpi_mps_check(void) +{ + return 0; +} + static inline int acpi_check_resource_conflict(struct resource *res) { return 0; diff --git a/include/linux/audit.h b/include/linux/audit.h index 63c3bb98558f..8b82974bdc12 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -571,7 +571,7 @@ extern void audit_log_lost(const char *message); extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ -extern int audit_filter_user(struct netlink_skb_parms *cb, int type); +extern int audit_filter_user(struct netlink_skb_parms *cb); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, size_t datasz, uid_t loginuid, diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 686895bacd9d..a1d9b79078ea 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, + int flags); extern unsigned long free_all_bootmem(void); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern void *__alloc_bootmem_node(pg_data_t *pgdat, diff --git a/include/linux/capability.h b/include/linux/capability.h index fa830f8de032..02673846d205 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -501,6 +501,8 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); + int capable(int cap); int __capable(struct task_struct *t, int cap); diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h index 1605dd8aa646..6f9f19d66591 100644 --- a/include/linux/cfag12864b.h +++ b/include/linux/cfag12864b.h @@ -4,7 +4,7 @@ * Description: cfag12864b LCD driver header * License: GPLv2 * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-12 * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fbdf549..248e6e3b9b73 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5df3db58fcc6..c24875bd9c5b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -353,6 +353,10 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #endif /* NR_CPUS */ +#define next_cpu_nr(n, src) next_cpu(n, src) +#define cpus_weight_nr(cpumask) cpus_weight(cpumask) +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + /* * The following particular system cpumasks and operations manage * possible, present and online cpus. Each of them is a fixed size diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..d982eb89c77d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -300,7 +300,7 @@ extern int d_validate(struct dentry *, struct dentry *); extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); -extern char *d_path(struct path *, char *, int); +extern char *d_path(const struct path *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index f4a5871767f5..4aaa4afb1cb9 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -1,6 +1,8 @@ #ifndef __LINUX_DEBUG_LOCKING_H #define __LINUX_DEBUG_LOCKING_H +#include <linux/kernel.h> + struct task_struct; extern int debug_locks; @@ -11,14 +13,6 @@ extern int debug_locks_silent; */ extern int debug_locks_off(void); -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define _RET_IP_ (unsigned long)__builtin_return_address(0) -#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) - #define DEBUG_LOCKS_WARN_ON(c) \ ({ \ int __ret = 0; \ diff --git a/include/linux/delay.h b/include/linux/delay.h index 54552d21296e..fd832c6d419e 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif +extern unsigned long lpj_fine; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a7ad0e..807373d467f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h new file mode 100644 index 000000000000..acbdbcc16051 --- /dev/null +++ b/include/linux/firmware-map.h @@ -0,0 +1,74 @@ +/* + * include/linux/firmware-map.h: + * Copyright (C) 2008 SUSE LINUX Products GmbH + * by Bernhard Walle <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _LINUX_FIRMWARE_MAP_H +#define _LINUX_FIRMWARE_MAP_H + +#include <linux/list.h> +#include <linux/kobject.h> + +/* + * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled + */ +#ifdef CONFIG_FIRMWARE_MEMMAP + +/** + * Adds a firmware mapping entry. This function uses kmalloc() for memory + * allocation. Use firmware_map_add_early() if you want to use the bootmem + * allocator. + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type); + +/** + * Adds a firmware mapping entry. This function uses the bootmem allocator + * for memory allocation. Use firmware_map_add() if you want to use kmalloc(). + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add_early(resource_size_t start, resource_size_t end, + const char *type); + +#else /* CONFIG_FIRMWARE_MEMMAP */ + +static inline int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type) +{ + return 0; +} + +static inline int firmware_map_add_early(resource_size_t start, + resource_size_t end, const char *type) +{ + return 0; +} + +#endif /* CONFIG_FIRMWARE_MEMMAP */ + +#endif /* _LINUX_FIRMWARE_MAP_H */ diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 4d10c7328d2d..6c7eff2ebada 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -13,7 +13,7 @@ struct firmware { struct device; -#if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE) +#if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( diff --git a/include/linux/fs.h b/include/linux/fs.h index d490779f18d9..d8e2762ed14d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -83,6 +83,7 @@ extern int dir_notify_enable; #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) +#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 @@ -894,8 +895,6 @@ static inline int file_check_writeable(struct file *filp) typedef struct files_struct *fl_owner_t; struct file_lock_operations { - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ - void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fb9af6a0fe9c..8dc730132192 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -171,7 +171,7 @@ struct i2c_client { struct i2c_adapter *adapter; /* the adapter we sit on */ struct i2c_driver *driver; /* and our access routines */ struct device dev; /* the device structure */ - int irq; /* irq issued by device (or -1) */ + int irq; /* irq issued by device */ struct list_head list; /* DEPRECATED */ struct completion released; }; diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h index 80335b7d77c5..c4335faebb63 100644 --- a/include/linux/inet_lro.h +++ b/include/linux/inet_lro.h @@ -84,7 +84,11 @@ struct net_lro_mgr { from received packets and eth protocol is still ETH_P_8021Q */ - u32 ip_summed; /* Set in non generated SKBs in page mode */ + /* + * Set for generated SKBs that are not added to + * the frag list in fragmented mode + */ + u32 ip_summed; u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY * or CHECKSUM_NONE */ diff --git a/include/linux/input.h b/include/linux/input.h index e075c4b762fb..d150c57e5f0a 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -534,8 +534,8 @@ struct input_absinfo { #define KEY_FRAMEBACK 0x1b4 /* Consumer - transport controls */ #define KEY_FRAMEFORWARD 0x1b5 - #define KEY_CONTEXT_MENU 0x1b6 /* GenDesc - system context menu */ +#define KEY_MEDIA_REPEAT 0x1b7 /* Consumer - transport control */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779b..2e70006c7fa8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -46,6 +46,9 @@ extern const char linux_proc_banner[]; #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + #ifdef CONFIG_LBD # include <asm/div64.h> # define sector_div(a, b) do_div(a, b) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce898bf9..cf9f40a91c9c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include <asm/irq.h> #include <linux/smp.h> #include <linux/threads.h> #include <linux/percpu.h> #include <linux/cpumask.h> +#include <asm/irq.h> #include <asm/cputime.h> /* diff --git a/include/linux/ks0108.h b/include/linux/ks0108.h index a2c54acceb4e..cb311798e0bc 100644 --- a/include/linux/ks0108.h +++ b/include/linux/ks0108.h @@ -4,7 +4,7 @@ * Description: ks0108 LCD Controller driver header * License: GPLv2 * - * Author: Copyright (C) Miguel Ojeda Sandonis <[email protected]> + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..9fd1f859021b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,6 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H +#include <linux/compiler.h> #include <asm/linkage.h> #ifdef __cplusplus @@ -17,6 +18,9 @@ # define asmregparm #endif +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab01..cf1cd3a2ed78 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -998,8 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn); +extern void remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); @@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +typedef int (*work_fn_t)(unsigned long, unsigned long, void *); +extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); @@ -1024,6 +1026,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f27fd2009334..25f87102ab66 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -88,6 +88,8 @@ struct wireless_dev; #define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ #define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ +#ifdef __KERNEL__ + /* * Compute the worst case header length according to the protocols * used. @@ -114,6 +116,8 @@ struct wireless_dev; #define MAX_HEADER (LL_MAX_HEADER + 48) #endif +#endif /* __KERNEL__ */ + struct net_device_subqueue { /* Give a control state for each queue. This struct may contain diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f31debfac926..0d2a4e7012aa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905f7b12..e8c06122be36 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include <linux/types.h> -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index eafc9d6d2b35..65953822c9cb 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1520,6 +1520,7 @@ #define PCI_DEVICE_ID_MARVELL_GT64260 0x6430 #define PCI_DEVICE_ID_MARVELL_MV64360 0x6460 #define PCI_DEVICE_ID_MARVELL_MV64460 0x6480 +#define PCI_DEVICE_ID_MARVELL_CAFE_SD 0x4101 #define PCI_VENDOR_ID_V3 0x11b0 #define PCI_DEVICE_ID_V3_V960 0x0001 diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 000000000000..bde4586f4382 --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#include <linux/list.h> + +#endif /* _LINUX_RCULIST_H */ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28c6960..c9ba2fdf807d 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ diff --git a/include/linux/securebits.h b/include/linux/securebits.h index c1f19dbceb05..92f09bdf1175 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -7,14 +7,15 @@ inheritance of root-permissions and suid-root executable under compatibility mode. We raise the effective and inheritable bitmasks *of the executable file* if the effective uid of the new process is - 0. If the real uid is 0, we raise the inheritable bitmask of the + 0. If the real uid is 0, we raise the effective (legacy) bit of the executable file. */ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ -/* When set, setuid to/from uid 0 does not trigger capability-"fixes" - to be compatible with old programs relying on set*uid to loose - privileges. When unset, setuid doesn't change privileges. */ +/* When set, setuid to/from uid 0 does not trigger capability-"fixup". + When unset, to provide compatiblility with old programs relying on + set*uid to gain/lose privilege, transitions to/from uid 0 cause + capabilities to be gained/lost. */ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ @@ -26,10 +27,10 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specify +/* Each securesetting is implemented using two bits. One bit specifies whether the setting is on or off. The other bit specify whether the - setting is fixed or not. A setting which is fixed cannot be changed - from user-level. */ + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ #define issecure_mask(X) (1 << (X)) #define issecure(X) (issecure_mask(X) & current->securebits) diff --git a/include/linux/slab.h b/include/linux/slab.h index c2ad35016599..9aa90a6f20e0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1,7 +1,7 @@ /* * Written by Mark Hemment, 1996 ([email protected]). * - * (C) SGI 2006, Christoph Lameter <[email protected]> + * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 71e43a12ebbb..d117ea2825a9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -4,7 +4,7 @@ /* * SLUB : A Slab allocator without object queues. * - * (C) 2007 SGI, Christoph Lameter <[email protected]> + * (C) 2007 SGI, Christoph Lameter */ #include <linux/types.h> #include <linux/gfp.h> @@ -137,10 +137,12 @@ static __always_inline int kmalloc_index(size_t size) if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; +#if KMALLOC_MIN_SIZE <= 64 if (size > 64 && size <= 96) return 1; if (size > 128 && size <= 192) return 2; +#endif if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 06d3e6eb9ca8..917707e6151d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -66,8 +66,7 @@ struct thermal_cooling_device { ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) -#if defined(CONFIG_HWMON) || \ - (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) +#if defined(CONFIG_THERMAL_HWMON) /* thermal zone devices with the same type share one hwmon device */ struct thermal_hwmon_device { char type[THERMAL_NAME_LENGTH]; @@ -94,8 +93,7 @@ struct thermal_zone_device { struct idr idr; struct mutex lock; /* protect cooling devices list */ struct list_head node; -#if defined(CONFIG_HWMON) || \ - (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) +#if defined(CONFIG_THERMAL_HWMON) struct list_head hwmon_node; struct thermal_hwmon_device *hwmon; struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ diff --git a/include/media/cx25840.h b/include/media/cx25840.h index cd599ad29fb2..db431d513f2f 100644 --- a/include/media/cx25840.h +++ b/include/media/cx25840.h @@ -32,12 +32,16 @@ enum cx25840_video_input { CX25840_COMPOSITE7, CX25840_COMPOSITE8, - /* S-Video inputs consist of one luma input (In1-In4) ORed with one + /* S-Video inputs consist of one luma input (In1-In8) ORed with one chroma input (In5-In8) */ CX25840_SVIDEO_LUMA1 = 0x10, CX25840_SVIDEO_LUMA2 = 0x20, CX25840_SVIDEO_LUMA3 = 0x30, CX25840_SVIDEO_LUMA4 = 0x40, + CX25840_SVIDEO_LUMA5 = 0x50, + CX25840_SVIDEO_LUMA6 = 0x60, + CX25840_SVIDEO_LUMA7 = 0x70, + CX25840_SVIDEO_LUMA8 = 0x80, CX25840_SVIDEO_CHROMA4 = 0x400, CX25840_SVIDEO_CHROMA5 = 0x500, CX25840_SVIDEO_CHROMA6 = 0x600, diff --git a/include/media/ir-common.h b/include/media/ir-common.h index bfee8be5d63f..b8e8aa91905a 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -146,6 +146,7 @@ extern IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_pinnacle_pctv_hd[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_genius_tvgo_a11mce[IR_KEYTAB_SIZE]; extern IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE]; +extern IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE]; #endif diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index 33f01ae08f76..859f7a6f6f67 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -40,9 +40,9 @@ #define VFL_TYPE_VTX 3 /* Video standard functions */ -extern char *v4l2_norm_to_name(v4l2_std_id id); +extern const char *v4l2_norm_to_name(v4l2_std_id id); extern int v4l2_video_std_construct(struct v4l2_standard *vs, - int id, char *name); + int id, const char *name); /* Prints the ioctl in a human-readable format */ extern void v4l_printk_ioctl(unsigned int cmd); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dae3f9ec1154..bcd1623245cb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -595,6 +595,15 @@ enum ieee80211_key_alg { ALG_CCMP, }; +/** + * enum ieee80211_key_len - key length + * @WEP40: WEP 5 byte long key + * @WEP104: WEP 13 byte long key + */ +enum ieee80211_key_len { + LEN_WEP40 = 5, + LEN_WEP104 = 13, +}; /** * enum ieee80211_key_flags - key flags diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ab502ec1c61c..a87fc0312edc 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -178,7 +178,7 @@ extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, u32 parentid); extern void tcf_destroy(struct tcf_proto *tp); -extern void tcf_destroy_chain(struct tcf_proto *fl); +extern void tcf_destroy_chain(struct tcf_proto **fl); static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) diff --git a/include/xen/events.h b/include/xen/events.h index acd8e062c85f..67c4436554a9 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); int resend_irq_on_evtchn(unsigned int irq); +void rebind_evtchn_irq(int evtchn, int irq); static inline void notify_remote_via_evtchn(int port) { @@ -40,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port) } extern void notify_remote_via_irq(int irq); + +extern void xen_irq_resume(void); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 466204846121..a40f1cd91be1 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -51,6 +51,9 @@ struct gnttab_free_callback { u16 count; }; +int gnttab_suspend(void); +int gnttab_resume(void); + int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly); diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 21c0ecfd786d..98b79bc404dd 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,4 +3,13 @@ extern struct console xenboot_console; +#ifdef CONFIG_HVC_XEN +void xen_console_resume(void); +#else +static inline void xen_console_resume(void) { } +#endif + +void xen_raw_console_write(const char *str); +void xen_raw_printk(const char *fmt, ...); + #endif /* XEN_HVC_CONSOLE_H */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index a64d3df5bd95..7a8262c375cc 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -120,6 +120,26 @@ */ #define XEN_ELFNOTE_BSD_SYMTAB 11 +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index d73228d16488..f51b6413b054 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -38,6 +38,9 @@ */ #define XENFEAT_pae_pgdir_above_4gb 4 +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h index 5a934dd7796d..974a51ed9165 100644 --- a/include/xen/interface/io/fbif.h +++ b/include/xen/interface/io/fbif.h @@ -49,11 +49,27 @@ struct xenfb_update { int32_t height; /* rect height */ }; +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize { + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* start offset within framebuffer */ +}; + #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { uint8_t type; struct xenfb_update update; + struct xenfb_resize resize; char pad[XENFB_OUT_EVENT_SIZE]; }; @@ -105,15 +121,18 @@ struct xenfb_page { * Each directory page holds PAGE_SIZE / sizeof(*pd) * framebuffer pages, and can thus map up to PAGE_SIZE * * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4, that's 4 Megs. Two directory - * pages should be enough for a while. + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 + * Megs 64 bit. 256 directories give enough room for a 512 + * Meg framebuffer with a max resolution of 12,800x10,240. + * Should be enough for a while with room leftover for + * expansion. */ - unsigned long pd[2]; + unsigned long pd[256]; }; /* - * Wart: xenkbd needs to know resolution. Put it here until a better - * solution is found, but don't leak it to the backend. + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. */ #ifdef __KERNEL__ #define XENFB_WIDTH 800 diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index fb97f4284ffd..8066c7849fbe 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -49,6 +49,7 @@ struct xenkbd_motion { uint8_t type; /* XENKBD_TYPE_MOTION */ int32_t rel_x; /* relative X motion */ int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ }; struct xenkbd_key { @@ -61,6 +62,7 @@ struct xenkbd_position { uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index da768469aa92..af36ead16817 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -50,6 +50,7 @@ struct xen_memory_reservation { domid_t domid; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); /* * Returns the maximum machine frame number of mapped RAM in this system. @@ -85,7 +86,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -93,6 +94,7 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -115,6 +117,7 @@ struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ unsigned long gpfn; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /* * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error @@ -129,13 +132,14 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - ulong gpfn_list; + GUEST_HANDLE(ulong) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - ulong mfn_list; + GUEST_HANDLE(ulong) mfn_list; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 819a0331cda9..2befa3e2f1bc 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -114,9 +114,14 @@ * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ /* * MMU EXTENDED OPERATIONS diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 10ddfe0142d0..a706d6a78960 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,4 +5,10 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +void xen_pre_suspend(void); +void xen_post_suspend(int suspend_cancelled); + +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); + #endif /* INCLUDE_XEN_OPS_H */ diff --git a/init/calibrate.c b/init/calibrate.c index ecb3822d4f70..7963e3fc51d9 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -8,7 +8,9 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/timex.h> +#include <linux/smp.h> +unsigned long lpj_fine; unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -33,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void) unsigned long pre_start, start, post_start; unsigned long pre_end, end, post_end; unsigned long start_jiffies; - unsigned long tsc_rate_min, tsc_rate_max; - unsigned long good_tsc_sum = 0; - unsigned long good_tsc_count = 0; + unsigned long timer_rate_min, timer_rate_max; + unsigned long good_timer_sum = 0; + unsigned long good_timer_count = 0; int i; if (read_current_timer(&pre_start) < 0 ) @@ -79,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void) } read_current_timer(&post_end); - tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; - tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; + timer_rate_max = (post_end - pre_start) / + DELAY_CALIBRATION_TICKS; + timer_rate_min = (pre_end - post_start) / + DELAY_CALIBRATION_TICKS; /* - * If the upper limit and lower limit of the tsc_rate is + * If the upper limit and lower limit of the timer_rate is * >= 12.5% apart, redo calibration. */ if (pre_start != 0 && pre_end != 0 && - (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { - good_tsc_count++; - good_tsc_sum += tsc_rate_max; + (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) { + good_timer_count++; + good_timer_sum += timer_rate_max; } } - if (good_tsc_count) - return (good_tsc_sum/good_tsc_count); + if (good_timer_count) + return (good_timer_sum/good_timer_count); printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); @@ -108,6 +112,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} * This is the number of bits of precision for the loops_per_jiffy. Each * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% + * For the boot cpu we can skip the delay calibration and assign it a value + * calculated based on the timer frequency. + * For the rest of the CPUs we cannot assume that the timer frequency is same as + * the cpu frequency, hence do the calibration for those. */ #define LPS_PREC 8 @@ -118,20 +126,20 @@ void __cpuinit calibrate_delay(void) if (preset_lpj) { loops_per_jiffy = preset_lpj; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100); + printk(KERN_INFO + "Calibrating delay loop (skipped) preset value.. "); + } else if ((smp_processor_id() == 0) && lpj_fine) { + loops_per_jiffy = lpj_fine; + printk(KERN_INFO + "Calibrating delay loop (skipped), " + "value calculated using timer frequency.. "); } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { - printk("Calibrating delay using timer specific routine.. "); - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); + printk(KERN_INFO + "Calibrating delay using timer specific routine.. "); } else { loops_per_jiffy = (1<<12); - printk(KERN_DEBUG "Calibrating delay loop... "); + printk(KERN_INFO "Calibrating delay loop... "); while ((loops_per_jiffy <<= 1) != 0) { /* wait for "start of" clock tick */ ticks = jiffies; @@ -161,12 +169,8 @@ void __cpuinit calibrate_delay(void) if (jiffies != ticks) /* longer than 1 tick */ loops_per_jiffy &= ~loopbit; } - - /* Round the value and print it */ - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); } - + printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); } diff --git a/kernel/audit.c b/kernel/audit.c index e8692a5748c2..e092f1c0ce30 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -738,7 +738,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; - err = audit_filter_user(&NETLINK_CB(skb), msg_type); + err = audit_filter_user(&NETLINK_CB(skb)); if (err == 1) { err = 0; if (msg_type == AUDIT_USER_TTY) { @@ -779,7 +779,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST: - err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -798,7 +798,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST_RULES: - err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 0e0bd27e6512..98c50cc671bb 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1544,6 +1544,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, * @data: payload data * @datasz: size of payload data * @loginuid: loginuid of sender + * @sessionid: sessionid for netlink audit message * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, @@ -1720,7 +1721,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, return 1; } -int audit_filter_user(struct netlink_skb_parms *cb, int type) +int audit_filter_user(struct netlink_skb_parms *cb) { enum audit_state state = AUDIT_DISABLED; struct audit_entry *e; diff --git a/kernel/capability.c b/kernel/capability.c index cfbe44299488..901e0fdc3fff 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -121,6 +121,27 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) * uninteresting and/or not to be changed. */ +/* + * Atomically modify the effective capabilities returning the original + * value. No permission check is performed here - it is assumed that the + * caller is permitted to set the desired effective capabilities. + */ +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) +{ + kernel_cap_t pE_old; + + spin_lock(&task_capability_lock); + + pE_old = current->cap_effective; + current->cap_effective = pE_new; + + spin_unlock(&task_capability_lock); + + return pE_old; +} + +EXPORT_SYMBOL(cap_set_effective); + /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5fe5cc7..ab80515008f4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1003,10 +1003,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) */ raise = timer->state == HRTIMER_STATE_PENDING; + /* + * We use preempt_disable to prevent this task from migrating after + * setting up the softirq and raising it. Otherwise, if me migrate + * we will raise the softirq on the wrong CPU. + */ + preempt_disable(); + unlock_hrtimer_base(timer, &flags); if (raise) hrtimer_raise_softirq(); + preempt_enable(); return ret; } diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c32aa3b..1fb1382009f3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -75,6 +75,8 @@ EXPORT_SYMBOL(oops_in_progress); static DECLARE_MUTEX(console_sem); static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; +EXPORT_SYMBOL_GPL(console_drivers); + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -121,6 +123,8 @@ struct console_cmdline static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int selected_console = -1; static int preferred_console = -1; +int console_set_on_cmdline; +EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -890,6 +894,7 @@ static int __init console_setup(char *str) *s = 0; __add_preferred_console(buf, idx, options, brl_options); + console_set_on_cmdline = 1; return 1; } __setup("console=", console_setup); diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0f306f..a38895a5b8e2 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -89,8 +89,22 @@ static void force_quiescent_state(struct rcu_data *rdp, /* * Don't send IPI to itself. With irqs disabled, * rdp->cpu is the current cpu. + * + * cpu_online_map is updated by the _cpu_down() + * using stop_machine_run(). Since we're in irqs disabled + * section, stop_machine_run() is not exectuting, hence + * the cpu_online_map is stable. + * + * However, a cpu might have been offlined _just_ before + * we disabled irqs while entering here. + * And rcu subsystem might not yet have handled the CPU_DEAD + * notification, leading to the offlined cpu's bit + * being set in the rcp->cpumask. + * + * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent + * sending smp_reschedule() to an offlined CPU. */ - cpumask = rcp->cpumask; + cpus_and(cpumask, rcp->cpumask, cpu_online_map); cpu_clear(rdp->cpu, cpumask); for_each_cpu_mask(cpu, cpumask) smp_send_reschedule(cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 3aaa5c8cb421..bcc22b569ee9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5887,6 +5887,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) next = pick_next_task(rq, rq->curr); if (!next) break; + next->sched_class->put_prev_task(rq, next); migrate_dead(dead_cpu, next); } @@ -6537,9 +6538,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) min_val = INT_MAX; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Start at @node */ - n = (node + i) % MAX_NUMNODES; + n = (node + i) % nr_node_ids; if (!nr_cpus_node(n)) continue; @@ -6733,7 +6734,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) if (!sched_group_nodes) continue; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; *nodemask = node_to_cpumask(i); @@ -6926,7 +6927,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* * Allocate the per-node list of sched groups */ - sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), + sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *), GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); @@ -7065,7 +7066,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #endif /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { SCHED_CPUMASK_VAR(nodemask, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); @@ -7089,7 +7090,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, send_covered, tmpmask); } - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; SCHED_CPUMASK_VAR(nodemask, allmasks); @@ -7128,9 +7129,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, cpus_or(*covered, *covered, *nodemask); prev = sg; - for (j = 0; j < MAX_NUMNODES; j++) { + for (j = 0; j < nr_node_ids; j++) { SCHED_CPUMASK_VAR(notcovered, allmasks); - int n = (i + j) % MAX_NUMNODES; + int n = (i + j) % nr_node_ids; node_to_cpumask_ptr(pnodemask, n); cpus_complement(*notcovered, *covered); @@ -7183,7 +7184,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, } #ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) + for (i = 0; i < nr_node_ids; i++) init_numa_sched_groups_power(sched_group_nodes[i]); if (sd_allnodes) { @@ -8501,6 +8502,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) rt_period = (u64)rt_period_us * NSEC_PER_USEC; rt_runtime = tg->rt_bandwidth.rt_runtime; + if (rt_period == 0) + return -EINVAL; + return tg_set_bandwidth(tg, rt_period, rt_runtime); } diff --git a/kernel/softlockup.c b/kernel/softlockup.c index c828c2339cc9..a272d78185eb 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -120,6 +120,7 @@ void softlockup_tick(void) printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", this_cpu, now - touch_timestamp, current->comm, task_pid_nr(current)); + print_modules(); if (regs) show_regs(regs); else diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 57a1f02e5ec0..67f80c261709 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -30,6 +30,7 @@ struct tick_device tick_broadcast_device; static cpumask_t tick_broadcast_mask; static DEFINE_SPINLOCK(tick_broadcast_lock); +static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); @@ -232,10 +233,11 @@ static void tick_do_broadcast_on_off(void *why) CLOCK_EVT_MODE_SHUTDOWN); } if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) - dev->features |= CLOCK_EVT_FEAT_DUMMY; + tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (cpu_isset(cpu, tick_broadcast_mask)) { + if (!tick_broadcast_force && + cpu_isset(cpu, tick_broadcast_mask)) { cpu_clear(cpu, tick_broadcast_mask); if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 29fc39f1029c..ce7799540c91 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -13,7 +13,7 @@ * Kai Petzke <[email protected]> * Theodore Ts'o <[email protected]> * - * Made to use alloc_percpu by Christoph Lameter <[email protected]>. + * Made to use alloc_percpu by Christoph Lameter. */ #include <linux/module.h> diff --git a/lib/bug.c b/lib/bug.c index 530f38f55787..bfeafd60ee9f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -37,6 +37,7 @@ */ #include <linux/list.h> #include <linux/module.h> +#include <linux/kernel.h> #include <linux/bug.h> #include <linux/sched.h> @@ -149,6 +150,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) (void *)bugaddr); show_regs(regs); + add_taint(TAINT_WARN); return BUG_TRAP_TYPE_WARN; } diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a76a5e122ae1..85b18d79be89 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -68,6 +68,7 @@ static int fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; struct debug_obj *new; + unsigned long flags; if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) return obj_pool_free; @@ -81,10 +82,10 @@ static int fill_pool(void) if (!new) return obj_pool_free; - spin_lock(&pool_lock); + spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); obj_pool_free++; - spin_unlock(&pool_lock); + spin_unlock_irqrestore(&pool_lock, flags); } return obj_pool_free; } @@ -110,16 +111,13 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) } /* - * Allocate a new object. If the pool is empty and no refill possible, - * switch off the debugger. + * Allocate a new object. If the pool is empty, switch off the debugger. */ static struct debug_obj * alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) { struct debug_obj *obj = NULL; - int retry = 0; -repeat: spin_lock(&pool_lock); if (obj_pool.first) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); @@ -141,9 +139,6 @@ repeat: } spin_unlock(&pool_lock); - if (fill_pool() && !obj && !retry++) - goto repeat; - return obj; } @@ -261,6 +256,8 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) struct debug_obj *obj; unsigned long flags; + fill_pool(); + db = get_bucket((unsigned long) addr); spin_lock_irqsave(&db->lock, flags); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 169a2f8dabcc..56ec21a7f73d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter <[email protected]> + * Copyright (C) 2005 SGI, Christoph Lameter * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or diff --git a/lib/ts_bm.c b/lib/ts_bm.c index d90822c378a4..4a7fce72898e 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -63,7 +63,7 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) struct ts_bm *bm = ts_config_priv(conf); unsigned int i, text_len, consumed = state->offset; const u8 *text; - int shift = bm->patlen, bs; + int shift = bm->patlen - 1, bs; for (;;) { text_len = conf->get_next_block(consumed, &text, conf, state); diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6eed..05f2b4009ccc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -1,7 +1,7 @@ /* * linux/mm/allocpercpu.c * - * Separated from slab.c August 11, 2006 Christoph Lameter <[email protected]> + * Separated from slab.c August 11, 2006 Christoph Lameter */ #include <linux/mm.h> #include <linux/module.h> diff --git a/mm/memory.c b/mm/memory.c index d14b251a25a6..2302d228fe04 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1151,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * be processed until returning to user space. */ if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) - return -ENOMEM; + return i ? i : -ENOMEM; if (write) foll_flags |= FOLL_WRITE; @@ -1697,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); - if (!old_page) + if (!old_page) { + /* + * VM_MIXEDMAP !pfn_valid() case + * + * We should not cow pages in a shared writeable mapping. + * Just mark the pages writable as we can't do any dirty + * accounting on raw pfn maps. + */ + if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED)) + goto reuse; goto gotten; + } /* * Take out anonymous pages first, anonymous shared vmas are @@ -1751,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, } if (reuse) { +reuse: flush_cache_page(vma, address, pte_pfn(orig_pte)); entry = pte_mkyoung(orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37a5034f63d..c94e58b192c3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, } else { *policy = pol == &default_policy ? MPOL_DEFAULT : pol->mode; - *policy |= pol->flags; + /* + * Internal mempolicy flags must be masked off before exposing + * the policy to userspace. + */ + *policy |= (pol->flags & MPOL_MODE_FLAGS); } if (vma) { diff --git a/mm/migrate.c b/mm/migrate.c index 112bcaeaa104..55bd355d170d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -9,7 +9,7 @@ * IWAMOTO Toshihiro <[email protected]> * Hirokazu Takahashi <[email protected]> * Dave Hansen <[email protected]> - * Christoph Lameter <[email protected]> + * Christoph Lameter */ #include <linux/migrate.h> diff --git a/mm/mprotect.c b/mm/mprotect.c index a5bf31c27375..acfe7c8d72fc 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, if (pte_present(oldpte)) { pte_t ptent; - /* Avoid an SMP race with hardware updated dirty/clean - * bits by wiping the pte and then setting the new pte - * into place. - */ - ptent = ptep_get_and_clear(mm, addr, pte); + ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); + /* * Avoid taking write faults for pages we know to be * dirty. */ if (dirty_accountable && pte_dirty(ptent)) ptent = pte_mkwrite(ptent); - set_pte_at(mm, addr, pte, ptent); + + ptep_modify_prot_commit(mm, addr, pte, ptent); #ifdef CONFIG_MIGRATION } else if (!pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f552955a02f..f024b9b3a2a6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2328,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat) static void build_zonelist_cache(pg_data_t *pgdat) { pgdat->node_zonelists[0].zlcache_ptr = NULL; - pgdat->node_zonelists[1].zlcache_ptr = NULL; } #endif /* CONFIG_NUMA */ @@ -2930,6 +2929,18 @@ void __init free_bootmem_with_active_regions(int nid, } } +void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) +{ + int i; + int ret; + + for_each_active_range_index_in_nid(i, nid) { + ret = work_fn(early_node_map[i].start_pfn, + early_node_map[i].end_pfn, data); + if (ret) + break; + } +} /** * sparse_memory_present_with_active_regions - Call memory_present for each active range * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. @@ -3462,6 +3473,11 @@ void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat, calculate_node_totalpages(pgdat, zones_size, zholes_size); alloc_node_mem_map(pgdat); +#ifdef CONFIG_FLAT_NODE_MEM_MAP + printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n", + nid, (unsigned long)pgdat, + (unsigned long)pgdat->node_mem_map); +#endif free_area_init_core(pgdat, zones_size, zholes_size); } @@ -3504,7 +3520,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, { int i; - printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " + printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) " "%d entries of %d used\n", nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); @@ -3548,27 +3564,68 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, } /** - * shrink_active_range - Shrink an existing registered range of PFNs + * remove_active_range - Shrink an existing registered range of PFNs * @nid: The node id the range is on that should be shrunk - * @old_end_pfn: The old end PFN of the range - * @new_end_pfn: The new PFN of the range + * @start_pfn: The new PFN of the range + * @end_pfn: The new PFN of the range * * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. - * The map is kept at the end physical page range that has already been - * registered with add_active_range(). This function allows an arch to shrink - * an existing registered range. + * The map is kept near the end physical page range that has already been + * registered. This function allows an arch to shrink an existing registered + * range. */ -void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn) +void __init remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn) { - int i; + int i, j; + int removed = 0; + + printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n", + nid, start_pfn, end_pfn); /* Find the old active region end and shrink */ - for_each_active_range_index_in_nid(i, nid) - if (early_node_map[i].end_pfn == old_end_pfn) { - early_node_map[i].end_pfn = new_end_pfn; - break; + for_each_active_range_index_in_nid(i, nid) { + if (early_node_map[i].start_pfn >= start_pfn && + early_node_map[i].end_pfn <= end_pfn) { + /* clear it */ + early_node_map[i].start_pfn = 0; + early_node_map[i].end_pfn = 0; + removed = 1; + continue; + } + if (early_node_map[i].start_pfn < start_pfn && + early_node_map[i].end_pfn > start_pfn) { + unsigned long temp_end_pfn = early_node_map[i].end_pfn; + early_node_map[i].end_pfn = start_pfn; + if (temp_end_pfn > end_pfn) + add_active_range(nid, end_pfn, temp_end_pfn); + continue; } + if (early_node_map[i].start_pfn >= start_pfn && + early_node_map[i].end_pfn > end_pfn && + early_node_map[i].start_pfn < end_pfn) { + early_node_map[i].start_pfn = end_pfn; + continue; + } + } + + if (!removed) + return; + + /* remove the blank ones */ + for (i = nr_nodemap_entries - 1; i > 0; i--) { + if (early_node_map[i].nid != nid) + continue; + if (early_node_map[i].end_pfn) + continue; + /* we found it, get rid of it */ + for (j = i; j < nr_nodemap_entries - 1; j++) + memcpy(&early_node_map[j], &early_node_map[j+1], + sizeof(early_node_map[j])); + j = nr_nodemap_entries - 1; + memset(&early_node_map[j], 0, sizeof(early_node_map[j])); + nr_nodemap_entries--; + } } /** @@ -3612,7 +3669,7 @@ static void __init sort_node_map(void) } /* Find the lowest pfn for a node */ -unsigned long __init find_min_pfn_for_node(unsigned long nid) +unsigned long __init find_min_pfn_for_node(int nid) { int i; unsigned long min_pfn = ULONG_MAX; @@ -3623,7 +3680,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid) if (min_pfn == ULONG_MAX) { printk(KERN_WARNING - "Could not find start_pfn for node %lu\n", nid); + "Could not find start_pfn for node %d\n", nid); return 0; } @@ -3879,7 +3936,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - printk(" %-8s %8lu -> %8lu\n", + printk(" %-8s %0#10lx -> %0#10lx\n", zone_names[i], arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); @@ -3895,7 +3952,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) - printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, + printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); diff --git a/mm/slub.c b/mm/slub.c index 0987d1cd943c..1a427c0ae83b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5,7 +5,7 @@ * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * - * (C) 2007 SGI, Christoph Lameter <[email protected]> + * (C) 2007 SGI, Christoph Lameter */ #include <linux/mm.h> @@ -2995,8 +2995,6 @@ void __init kmem_cache_init(void) create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_KERNEL); caches++; - } - if (KMALLOC_MIN_SIZE <= 128) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_KERNEL); caches++; @@ -3026,6 +3024,16 @@ void __init kmem_cache_init(void) for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + if (KMALLOC_MIN_SIZE == 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[(i - 1) / 8] = 8; + } + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a3..a91b5f8fcaf6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -1,7 +1,7 @@ /* * Virtual Memory Map support * - * (C) 2007 sgi. Christoph Lameter <[email protected]>. + * (C) 2007 sgi. Christoph Lameter. * * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, * virt_to_page, page_address() to be implemented as a base offset diff --git a/net/core/dev.c b/net/core/dev.c index c421a1f8f0b9..fca23a3bf12c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -454,7 +454,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -479,7 +479,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -2973,7 +2973,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0f74e3..277a2302eb3a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } diff --git a/net/core/filter.c b/net/core/filter.c index 4f8369729a4e..df3744355839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e556d312117..366621610e76 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1292,12 +1292,14 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, { unsigned int nr_pages = spd->nr_pages; unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + int headlen, seg, error = 0; toff = *offset; tlen = *total_len; - if (!tlen) + if (!tlen) { + error = 1; goto err; + } /* * if the offset is greater than the linear part, go directly to @@ -1339,7 +1341,8 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, * just jump directly to update and return, no point * in going over fragments when the output is full. */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) + error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb); + if (error) goto done; tlen -= plen; @@ -1369,7 +1372,8 @@ map_frag: if (!plen) break; - if (spd_fill_page(spd, f->page, plen, poff, skb)) + error = spd_fill_page(spd, f->page, plen, poff, skb); + if (error) break; tlen -= plen; @@ -1382,7 +1386,10 @@ done: return 0; } err: - return 1; + /* update the offset to reflect the linear part skip, if any */ + if (!error) + *offset = toff; + return error; } /* diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429bd5951..0546a0bc97ea 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 4a4d49fca1f2..cfd034a2b96e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, out2: /* send aggregated SKBs to stack */ lro_flush(lro_mgr, lro_desc); -out: /* Original SKB has to be posted to stack */ - skb->ip_summed = lro_mgr->ip_summed; +out: return 1; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac6358..37221f659159 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fc54a48fde1e..850825dc86e6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -260,6 +260,8 @@ #include <linux/socket.h> #include <linux/random.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> @@ -2620,7 +2622,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long limit; + unsigned long nr_pages, limit; int order, i, max_share; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2689,8 +2691,9 @@ void __init tcp_init(void) * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12695be2c255..ffe869ac1bcf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2291,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2303,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b48d93..f405cea21a8b 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e65e26e210ee..cf20bc4fd60d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -207,9 +207,10 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; + + read_lock_bh(&nf_frags.lock); hash = ip6qhashfn(id, src, dst); - local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535b..a60d7d129713 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -247,6 +247,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; + + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1f3e19b06c7..7ff687020fa9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -240,7 +240,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; @@ -253,7 +253,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -266,7 +266,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } return rt; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb46749d4c32..40ea9c36d24b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2036,7 +2036,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2052,8 +2052,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 150d66dbda9d..220e83be3ef4 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -380,6 +380,15 @@ void ieee80211_key_free(struct ieee80211_key *key) if (!key) return; + if (!key->sdata) { + /* The key has not been linked yet, simply free it + * and don't Oops */ + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + kfree(key); + return; + } + spin_lock_irqsave(&key->sdata->local->key_lock, flags); __ieee80211_key_free(key); spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 6106cb79060c..e8404212ad57 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, } } + if (alg == ALG_WEP && + key_len != LEN_WEP40 && key_len != LEN_WEP104) { + ieee80211_key_free(key); + err = -EINVAL; + goto out_unlock; + } + ieee80211_key_link(key, sdata, sta); if (set_tx_key || (!sta && !sdata->default_key && key)) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 635b996c8c35..5d09e8698b57 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -323,8 +323,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd) struct ieee80211_hw *hw = &local->hw; int queue; - tcf_destroy_chain(q->filter_list); - q->filter_list = NULL; + tcf_destroy_chain(&q->filter_list); for (queue=0; queue < hw->queues; queue++) { skb_queue_purge(&q->requeued[queue]); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004fe323..271cd01d57ae 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -331,12 +331,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + III. Upper bound for valid (s)ack: sack <= receiver.td_end + IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW - where sack is the highest right edge of sack block found in the packet. + where sack is the highest right edge of sack block found in the packet + or ack in the case of packet without SACK option. - The upper bound limit for a valid ack is not ignored - + The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ @@ -606,12 +607,12 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender))) { + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0099da5b2591..52b2611a6eb6 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1534,7 +1534,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } } list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (addr6->valid || iter_addr6++ < skip_addr6) + if (!addr6->valid || iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f8006c9c..349aba189558 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -886,7 +886,7 @@ retry: return netlink_unicast_kernel(sk, skb); if (sk_filter(sk, skb)) { - int err = skb->len; + err = skb->len; kfree_skb(skb); sock_put(sk); return err; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 47bbf45ae5d7..2d106cfe1d27 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,7 @@ errout: * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream + * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessable via the attribute type. Attributes with a type @@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) /** * nla_strlcpy - Copy string attribute payload into a sized buffer * @dst: where to copy the string to - * @src: attribute to copy the string from + * @nla: attribute to copy the string from * @dstsize: size of destination buffer * * Copies at most dstsize - 1 bytes into the destination buffer. @@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** - * nla_reserve - reserve room for attribute without header + * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on - * @len: length of attribute payload + * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 82adfe6447d7..9437b27ff84d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,17 +106,6 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. -config NET_SCH_RR - tristate "Multi Band Round Robin Queuing (RR)" - select NET_SCH_PRIO - ---help--- - Say Y here if you want to use an n-band round robin packet - scheduler. - - The module uses sch_prio for its framework and is aliased as - sch_rr, so it will load sch_prio, although it is referred - to using sch_rr. - config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c40773cdbe45..10f01ad04380 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1252,12 +1252,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 335273416384..db0e23ae85f8 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -160,7 +160,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(flow->filter_list); + tcf_destroy_chain(&flow->filter_list); if (flow->sock) { pr_debug("atm_tc_put: f_count %d\n", file_count(flow->sock->file)); @@ -586,10 +586,11 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + tcf_destroy_chain(&flow->filter_list); + /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(flow->filter_list); - flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09969c1fbc08..2a3c97f7dc63 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1704,7 +1704,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) BUG_TRAP(!cl->filters); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1728,10 +1728,8 @@ cbq_destroy(struct Qdisc* sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { - tcf_destroy_chain(cl->filter_list); - cl->filter_list = NULL; - } + for (cl = q->classes[h]; cl; cl = cl->next) + tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < 16; h++) { struct cbq_class *next; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 64465bacbe79..c4c1317cd47d 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -416,7 +416,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d355e5e47fe3..13afa7214392 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -468,7 +468,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) return sch; errout: - return ERR_PTR(-err); + return ERR_PTR(err); } struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index fdfaa3fcc16d..e817aa00441d 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1123,7 +1123,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->bstats, &cl->rate_est); if (cl != &q->root) @@ -1541,6 +1541,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < HFSC_HSIZE; i++) { + list_for_each_entry(cl, &q->clhash[i], hlist) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < HFSC_HSIZE; i++) { list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) hfsc_destroy_class(sch, cl); } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6807c97985a5..3fb58f428f72 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1238,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); while (!list_empty(&cl->children)) htb_destroy_class(sch, list_entry(cl->children.next, @@ -1267,7 +1267,7 @@ static void htb_destroy(struct Qdisc *sch) and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); while (!list_empty(&q->root)) htb_destroy_class(sch, list_entry(q->root.next, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 274b1ddb160c..956c80ad5965 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4aa2b45dad0a..5532f1031ab5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -219,7 +219,7 @@ prio_destroy(struct Qdisc* sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); for (prio=0; prio<q->bands; prio++) qdisc_destroy(q->queues[prio]); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f0463d757a98..6a97afbfb952 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5905d56737d6..81ae3d62a0cc 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: + /* placeholders for length and seq. number: */ + svc_putnl(resv, 0); + svc_putnl(resv, 0); if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + break; + case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - break; - case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; - /* placeholders for length and seq. number: */ - svc_putnl(resv, 0); - svc_putnl(resv, 0); break; default: goto auth_err; @@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } garbage_args: - /* Restore write pointer to its original value: */ - xdr_ressize_check(rqstp, reject_stat); ret = SVC_GARBAGE; goto out; auth_err: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 657835f227d3..783317dacd30 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -487,8 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); -static unsigned int unix_datagram_poll(struct file *, struct socket *, - poll_table *); +static unsigned int unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -534,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -555,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -1994,29 +1994,13 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } -static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk, *peer; - unsigned int mask; + struct sock *sk = sock->sk, *other; + unsigned int mask, writable; poll_wait(file, sk->sk_sleep, wait); - - peer = unix_peer_get(sk); - if (peer) { - if (peer != sk) { - /* - * Writability of a connected socket additionally - * depends on the state of the receive queue of the - * peer. - */ - poll_wait(file, &unix_sk(peer)->peer_wait, wait); - } else { - sock_put(peer); - peer = NULL; - } - } - mask = 0; /* exceptional events? */ @@ -2042,14 +2026,26 @@ static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, } /* writable? */ - if (unix_writable(sk) && !(peer && unix_recvq_full(peer))) + writable = unix_writable(sk); + if (writable) { + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + poll_wait(file, &unix_sk(other)->peer_wait, + wait); + if (unix_recvq_full(other)) + writable = 0; + } + + sock_put(other); + } + } + + if (writable) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (peer) - sock_put(peer); - return mask; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 185488da2466..855bff4b3250 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { IEEE80211_CHAN_RADAR), }; +static const struct ieee80211_channel_range ieee80211_EU_channels[] = { + /* IEEE 802.11b/g, channels 1..13 */ + RANGE_PWR(2412, 2472, 20, 6, 0), + /* IEEE 802.11a, channel 36*/ + RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40*/ + RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44*/ + RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), + /* IEEE 802.11a, channels 100..140 */ + RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), +}; + #define REGDOM(_code) \ { \ .code = __stringify(_code), \ @@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { static const struct ieee80211_regdomain ieee80211_regdoms[] = { REGDOM(US), REGDOM(JP), + REGDOM(EU), }; diff --git a/security/commoncap.c b/security/commoncap.c index 5edabc7542ae..33d343308413 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -103,10 +103,16 @@ static inline int cap_inh_is_capped(void) return (cap_capable(current, CAP_SETPCAP) != 0); } +static inline int cap_limit_ptraced_target(void) { return 1; } + #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ static inline int cap_block_setpcap(struct task_struct *t) { return 0; } static inline int cap_inh_is_capped(void) { return 1; } +static inline int cap_limit_ptraced_target(void) +{ + return !capable(CAP_SETPCAP); +} #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ @@ -342,9 +348,10 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) bprm->e_uid = current->uid; bprm->e_gid = current->gid; } - if (!capable (CAP_SETPCAP)) { - new_permitted = cap_intersect (new_permitted, - current->cap_permitted); + if (cap_limit_ptraced_target()) { + new_permitted = + cap_intersect(new_permitted, + current->cap_permitted); } } } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index baf348834b66..fd764a0858d0 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -382,6 +382,8 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, case 'a': wh.type = DEV_ALL; wh.access = ACC_MASK; + wh.major = ~0; + wh.minor = ~0; goto handle; case 'b': wh.type = DEV_BLOCK; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1dcf9f3d1107..44589088941f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -278,7 +278,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_deliver(ioapic, gsi); + ioapic_service(ioapic, gsi); } void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) |