From a66f66c44d53a4bab4b6b2903fd271f13ce4101b Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper.nilsson@axis.com>
Date: Tue, 6 Nov 2007 08:40:24 +0000
Subject: [MTD] Provide mtdram.h with mtdram_init_device() prototype

This is used by axisflashmap.c to boot from ram.

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Mikael Starvik <starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/mtd/mtdram.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 include/linux/mtd/mtdram.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtdram.h b/include/linux/mtd/mtdram.h
new file mode 100644
index 000000000000..04fdc07b7353
--- /dev/null
+++ b/include/linux/mtd/mtdram.h
@@ -0,0 +1,8 @@
+#ifndef __MTD_MTDRAM_H__
+#define __MTD_MTDRAM_H__
+
+#include <linux/mtd/mtd.h>
+int mtdram_init_device(struct mtd_info *mtd, void *mapped_address,
+                       unsigned long size, char *name);
+
+#endif /* __MTD_MTDRAM_H__ */
-- 
cgit 


From 239665a3bb0a2234980f918913add31bc536cfd1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 23 Nov 2007 20:08:02 -0500
Subject: ACPI: tables: complete searching upon RSDP w/ bad checksum.

ACPI tables follow a tree structure in memory.
The root of the tree is the RSDP (Root System Description Pointer).

To find the RSDP, the OS searches for the signature "RSD PTR "
in well known physical memory locations.  Then the OS computes
a table checksum to verify that the signature is really part
of a valid table header.

Some systems have a proper signature but an invalid checksum;
followed elsewhere by a proper signature with valid checksum.

http://bugzilla.kernel.org/show_bug.cgi?id=9444

The Linux RSDP scanning code bailed out on those systems
and as a result they booted with ACPI disabled.

Fix this by deleting the Linux RSDP scanning code and
plugging in the ACPICA RSDP scanning code.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/kernel/acpi.c        | 26 ++++++++++++++------------
 arch/x86/kernel/acpi/boot.c    | 40 ----------------------------------------
 arch/x86/kernel/srat_32.c      |  2 +-
 drivers/acpi/osl.c             |  8 ++++++--
 drivers/acpi/tables/Makefile   |  2 +-
 drivers/acpi/tables/tbxfroot.c |  4 +---
 include/linux/acpi.h           |  1 -
 7 files changed, 23 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 897e2083a3b1..63d6dcdc2e2a 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -69,6 +69,20 @@ unsigned int acpi_cpei_phys_cpuid;
 
 unsigned long acpi_wakeup_address = 0;
 
+#ifdef CONFIG_IA64_GENERIC
+static unsigned long __init acpi_find_rsdp(void)
+{
+	unsigned long rsdp_phys = 0;
+
+	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+		rsdp_phys = efi.acpi20;
+	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+		printk(KERN_WARNING PREFIX
+		       "v1.0/r0.71 tables no longer supported\n");
+	return rsdp_phys;
+}
+#endif
+
 const char __init *
 acpi_get_sysname(void)
 {
@@ -631,18 +645,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 	return 0;
 }
 
-unsigned long __init acpi_find_rsdp(void)
-{
-	unsigned long rsdp_phys = 0;
-
-	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
-		rsdp_phys = efi.acpi20;
-	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
-		printk(KERN_WARNING PREFIX
-		       "v1.0/r0.71 tables no longer supported\n");
-	return rsdp_phys;
-}
-
 int __init acpi_boot_init(void)
 {
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0ca27c7b0e8d..719c74b0141a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -581,25 +581,6 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
-	unsigned long offset = 0;
-	unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
-	/*
-	 * Scan all 16-byte boundaries of the physical memory region for the
-	 * RSDP signature.
-	 */
-	for (offset = 0; offset < length; offset += 16) {
-		if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
-			continue;
-		return (start + offset);
-	}
-
-	return 0;
-}
-
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
@@ -742,27 +723,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 	return 0;
 }
 
-unsigned long __init acpi_find_rsdp(void)
-{
-	unsigned long rsdp_phys = 0;
-
-	if (efi_enabled) {
-		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
-			return efi.acpi20;
-		else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
-			return efi.acpi;
-	}
-	/*
-	 * Scan memory looking for the RSDP signature. First search EBDA (low
-	 * memory) paragraphs and then search upper memory (E0000-FFFFF).
-	 */
-	rsdp_phys = acpi_scan_rsdp(0, 0x400);
-	if (!rsdp_phys)
-		rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
-	return rsdp_phys;
-}
-
 #ifdef	CONFIG_X86_LOCAL_APIC
 /*
  * Parse LAPIC entries in MADT
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2a8713ec0f9a..b3b2c95f19d3 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -276,7 +276,7 @@ int __init get_memcfg_from_srat(void)
 	int tables = 0;
 	int i = 0;
 
-	rsdp_address = acpi_find_rsdp();
+	rsdp_address = acpi_os_get_root_pointer();
 	if (!rsdp_address) {
 		printk("%s: System description tables not found\n",
 		       __FUNCTION__);
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index aabc6ca4a81c..101691ef66cb 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -207,8 +207,12 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
 			       "System description tables not found\n");
 			return 0;
 		}
-	} else
-		return acpi_find_rsdp();
+	} else {
+		acpi_physical_address pa = 0;
+
+		acpi_find_root_pointer(&pa);
+		return pa;
+	}
 }
 
 void __iomem *acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
diff --git a/drivers/acpi/tables/Makefile b/drivers/acpi/tables/Makefile
index 0a7d7afac255..7385efa61622 100644
--- a/drivers/acpi/tables/Makefile
+++ b/drivers/acpi/tables/Makefile
@@ -2,6 +2,6 @@
 # Makefile for all Linux ACPI interpreter subdirectories
 #
 
-obj-y := tbxface.o tbinstal.o  tbutils.o tbfind.o tbfadt.o
+obj-y := tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
 
 EXTRA_CFLAGS += $(ACPI_CFLAGS)
diff --git a/drivers/acpi/tables/tbxfroot.c b/drivers/acpi/tables/tbxfroot.c
index cf8fa514189f..9ecb4b6c1e7d 100644
--- a/drivers/acpi/tables/tbxfroot.c
+++ b/drivers/acpi/tables/tbxfroot.c
@@ -100,7 +100,7 @@ static acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp)
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_tb_find_rsdp
+ * FUNCTION:    acpi_find_root_pointer
  *
  * PARAMETERS:  table_address           - Where the table pointer is returned
  *
@@ -219,8 +219,6 @@ acpi_status acpi_find_root_pointer(acpi_native_uint * table_address)
 	return_ACPI_STATUS(AE_NOT_FOUND);
 }
 
-ACPI_EXPORT_SYMBOL(acpi_find_root_pointer)
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_tb_scan_memory_for_rsdp
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 8ccedf7a0a5a..8deb61171558 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -79,7 +79,6 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table);
 typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
 
 char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
-unsigned long acpi_find_rsdp (void);
 int acpi_boot_init (void);
 int acpi_boot_table_init (void);
 int acpi_numa_init (void);
-- 
cgit 


From 393852ecfeec575ac78216b0eb58e4fd92f0816c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 6 Dec 2007 18:47:30 +0200
Subject: UBI: add ubi_leb_map interface

The idea of this interface belongs to Adrian Hunter. The
interface is extremely useful when one has to have a guarantee
that an LEB will contain all 0xFFs even in case of an unclean
reboot. UBI does have an 'ubi_leb_erase()' call which may do
this, but it is stupid and ineffecient, because it flushes whole
queue. I should be re-worked to just be a pair of unmap,
map calls.

The user of the interfaci is UBIFS at the moment.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/eba.c   | 13 ++++++++-----
 drivers/mtd/ubi/kapi.c  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/ubi.h |  1 +
 3 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 85f50c83cf42..c87db07bcd0c 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -656,11 +656,14 @@ retry:
 		goto write_error;
 	}
 
-	err = ubi_io_write_data(ubi, buf, pnum, offset, len);
-	if (err) {
-		ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, "
-			 "PEB %d", len, offset, vol_id, lnum, pnum);
-		goto write_error;
+	if (len) {
+		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
+		if (err) {
+			ubi_warn("failed to write %d bytes at offset %d of "
+				 "LEB %d:%d, PEB %d", len, offset, vol_id,
+				 lnum, pnum);
+			goto write_error;
+		}
 	}
 
 	vol->eba_tbl[lnum] = pnum;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 03c774f41549..e1ef802a03a7 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -546,6 +546,51 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
 }
 EXPORT_SYMBOL_GPL(ubi_leb_unmap);
 
+/**
+ * ubi_leb_map - map logical erasblock to a physical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ * @dtype: expected data type
+ *
+ * This function maps an un-mapped logical eraseblock @lnum to a physical
+ * eraseblock. This means, that after a successfull invocation of this
+ * function the logical eraseblock @lnum will be empty (contain only %0xFF
+ * bytes) and be mapped to a physical eraseblock, even if an unclean reboot
+ * happens.
+ *
+ * This function returns zero in case of success, %-EBADF if the volume is
+ * damaged because of an interrupted update, %-EBADMSG if the logical
+ * eraseblock is already mapped, and other negative error codes in case of
+ * other failures.
+ */
+int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("unmap LEB %d:%d", vol_id, lnum);
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
+	    dtype != UBI_UNKNOWN)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	if (vol->eba_tbl[lnum] >= 0)
+		return -EBADMSG;
+
+	return ubi_eba_write_leb(ubi, vol_id, lnum, NULL, 0, 0, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_map);
+
 /**
  * ubi_is_mapped - check if logical eraseblock is mapped.
  * @desc: volume descriptor
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 3d967b6b120a..c4abe0351225 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -167,6 +167,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
 		   int len, int dtype);
 int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
+int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
 int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
 
 /*
-- 
cgit 


From de7921f01a407e0cb38143363995db89a5f9a5c5 Mon Sep 17 00:00:00 2001
From: Bartlomiej Sieka <tur@semihalf.com>
Date: Mon, 26 Nov 2007 18:55:18 +0100
Subject: [MTD] [NOR] Fix incorrect interface code for x16/x32 chips

According to "Common Flash Memory Interface Publication 100" dated December 1,
2001, the interface code for x16/x32 chips is 0x0005, and not 0x0004 used so
far.

Signed-off-by: Bartlomiej Sieka <tur@semihalf.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c |  6 ++++--
 drivers/mtd/chips/cfi_probe.c       | 12 ++++++------
 drivers/mtd/maps/scb2_flash.c       |  2 +-
 include/linux/mtd/cfi.h             | 12 ++++++++++++
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 571226eefeb8..796bfeadea21 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -342,10 +342,12 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 		/* Modify the unlock address if we are in compatibility mode */
 		if (	/* x16 in x8 mode */
 			((cfi->device_type == CFI_DEVICETYPE_X8) &&
-				(cfi->cfiq->InterfaceDesc == 2)) ||
+				(cfi->cfiq->InterfaceDesc ==
+					CFI_INTERFACE_X8_BY_X16_ASYNC)) ||
 			/* x32 in x16 mode */
 			((cfi->device_type == CFI_DEVICETYPE_X16) &&
-				(cfi->cfiq->InterfaceDesc == 4)))
+				(cfi->cfiq->InterfaceDesc ==
+					CFI_INTERFACE_X16_BY_X32_ASYNC)))
 		{
 			cfi->addr_unlock1 = 0xaaa;
 			cfi->addr_unlock2 = 0x555;
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index 60e11a0ada97..f651b6ef1c5d 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -370,27 +370,27 @@ static void print_cfi_ident(struct cfi_ident *cfip)
 	printk("Device size: 0x%X bytes (%d MiB)\n", 1 << cfip->DevSize, 1<< (cfip->DevSize - 20));
 	printk("Flash Device Interface description: 0x%4.4X\n", cfip->InterfaceDesc);
 	switch(cfip->InterfaceDesc) {
-	case 0:
+	case CFI_INTERFACE_X8_ASYNC:
 		printk("  - x8-only asynchronous interface\n");
 		break;
 
-	case 1:
+	case CFI_INTERFACE_X16_ASYNC:
 		printk("  - x16-only asynchronous interface\n");
 		break;
 
-	case 2:
+	case CFI_INTERFACE_X8_BY_X16_ASYNC:
 		printk("  - supports x8 and x16 via BYTE# with asynchronous interface\n");
 		break;
 
-	case 3:
+	case CFI_INTERFACE_X32_ASYNC:
 		printk("  - x32-only asynchronous interface\n");
 		break;
 
-	case 4:
+	case CFI_INTERFACE_X16_BY_X32_ASYNC:
 		printk("  - supports x16 and x32 via Word# with asynchronous interface\n");
 		break;
 
-	case 65535:
+	case CFI_INTERFACE_NOT_ALLOWED:
 		printk("  - Not Allowed / Reserved\n");
 		break;
 
diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index dcfb85840d1e..0fc5584324e3 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -79,7 +79,7 @@ scb2_fixup_mtd(struct mtd_info *mtd)
 	struct cfi_private *cfi = map->fldrv_priv;
 
 	/* barf if this doesn't look right */
-	if (cfi->cfiq->InterfaceDesc != 1) {
+	if (cfi->cfiq->InterfaceDesc != CFI_INTERFACE_X16_ASYNC) {
 		printk(KERN_ERR MODNAME ": unsupported InterfaceDesc: %#x\n",
 		    cfi->cfiq->InterfaceDesc);
 		return -1;
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index e17c5343cf51..b0ddf4b25862 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -98,6 +98,18 @@ static inline int cfi_interleave_supported(int i)
 #define CFI_DEVICETYPE_X32 (32 / 8)
 #define CFI_DEVICETYPE_X64 (64 / 8)
 
+
+/* Device Interface Code Assignments from the "Common Flash Memory Interface
+ * Publication 100" dated December 1, 2001.
+ */
+#define CFI_INTERFACE_X8_ASYNC		0x0000
+#define CFI_INTERFACE_X16_ASYNC		0x0001
+#define CFI_INTERFACE_X8_BY_X16_ASYNC	0x0002
+#define CFI_INTERFACE_X32_ASYNC		0x0003
+#define CFI_INTERFACE_X16_BY_X32_ASYNC	0x0005
+#define CFI_INTERFACE_NOT_ALLOWED	0xffff
+
+
 /* NB: We keep these structures in memory in HOST byteorder, except
  * where individually noted.
  */
-- 
cgit 


From f4f37c8ec7d2491c8885c890ba74254b9adfbeee Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 4 Nov 2007 00:41:12 -0400
Subject: Input: Add proper locking when changing device's keymap

Take dev->event_lock to make sure that we don't race with input_event() and
also force key up event when removing a key from keymap table.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c |  4 +--
 drivers/input/evdev.c   |  6 ++--
 drivers/input/input.c   | 78 +++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/input.h   |  3 ++
 4 files changed, 80 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index fc54d234507a..5218d0d0511e 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -194,7 +194,7 @@ int getkeycode(unsigned int scancode)
 	int error = -ENODEV;
 
 	list_for_each_entry(handle, &kbd_handler.h_list, h_node) {
-		error = handle->dev->getkeycode(handle->dev, scancode, &keycode);
+		error = input_get_keycode(handle->dev, scancode, &keycode);
 		if (!error)
 			return keycode;
 	}
@@ -208,7 +208,7 @@ int setkeycode(unsigned int scancode, unsigned int keycode)
 	int error = -ENODEV;
 
 	list_for_each_entry(handle, &kbd_handler.h_list, h_node) {
-		error = handle->dev->setkeycode(handle->dev, scancode, keycode);
+		error = input_set_keycode(handle->dev, scancode, keycode);
 		if (!error)
 			break;
 	}
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index e5b4e9bfbdc5..0727b0a12557 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -617,7 +617,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 		if (get_user(t, ip))
 			return -EFAULT;
 
-		error = dev->getkeycode(dev, t, &v);
+		error = input_get_keycode(dev, t, &v);
 		if (error)
 			return error;
 
@@ -630,7 +630,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 		if (get_user(t, ip) || get_user(v, ip + 1))
 			return -EFAULT;
 
-		return dev->setkeycode(dev, t, v);
+		return input_set_keycode(dev, t, v);
 
 	case EVIOCSFF:
 		if (copy_from_user(&effect, p, sizeof(effect)))
@@ -683,7 +683,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 				case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
 				case EV_SW:  bits = dev->swbit;  len = SW_MAX;  break;
 				default: return -EINVAL;
-			}
+				}
 				return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode);
 			}
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index a0be978501ff..e1729e1dd9b2 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -493,7 +493,7 @@ static void input_disconnect_device(struct input_dev *dev)
 	if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) {
 		for (code = 0; code <= KEY_MAX; code++) {
 			if (is_event_supported(code, dev->keybit, KEY_MAX) &&
-			    test_bit(code, dev->key)) {
+			    __test_and_clear_bit(code, dev->key)) {
 				input_pass_event(dev, EV_KEY, code, 0);
 			}
 		}
@@ -526,7 +526,7 @@ static int input_default_getkeycode(struct input_dev *dev,
 	if (!dev->keycodesize)
 		return -EINVAL;
 
-	if (scancode < 0 || scancode >= dev->keycodemax)
+	if (scancode >= dev->keycodemax)
 		return -EINVAL;
 
 	*keycode = input_fetch_keycode(dev, scancode);
@@ -540,10 +540,7 @@ static int input_default_setkeycode(struct input_dev *dev,
 	int old_keycode;
 	int i;
 
-	if (scancode < 0 || scancode >= dev->keycodemax)
-		return -EINVAL;
-
-	if (keycode < 0 || keycode > KEY_MAX)
+	if (scancode >= dev->keycodemax)
 		return -EINVAL;
 
 	if (!dev->keycodesize)
@@ -586,6 +583,75 @@ static int input_default_setkeycode(struct input_dev *dev,
 	return 0;
 }
 
+/**
+ * input_get_keycode - retrieve keycode currently mapped to a given scancode
+ * @dev: input device which keymap is being queried
+ * @scancode: scancode (or its equivalent for device in question) for which
+ *	keycode is needed
+ * @keycode: result
+ *
+ * This function should be called by anyone interested in retrieving current
+ * keymap. Presently keyboard and evdev handlers use it.
+ */
+int input_get_keycode(struct input_dev *dev, int scancode, int *keycode)
+{
+	if (scancode < 0)
+		return -EINVAL;
+
+	return dev->getkeycode(dev, scancode, keycode);
+}
+EXPORT_SYMBOL(input_get_keycode);
+
+/**
+ * input_get_keycode - assign new keycode to a given scancode
+ * @dev: input device which keymap is being updated
+ * @scancode: scancode (or its equivalent for device in question)
+ * @keycode: new keycode to be assigned to the scancode
+ *
+ * This function should be called by anyone needing to update current
+ * keymap. Presently keyboard and evdev handlers use it.
+ */
+int input_set_keycode(struct input_dev *dev, int scancode, int keycode)
+{
+	unsigned long flags;
+	int old_keycode;
+	int retval;
+
+	if (scancode < 0)
+		return -EINVAL;
+
+	if (keycode < 0 || keycode > KEY_MAX)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+
+	retval = dev->getkeycode(dev, scancode, &old_keycode);
+	if (retval)
+		goto out;
+
+	retval = dev->setkeycode(dev, scancode, keycode);
+	if (retval)
+		goto out;
+
+	/*
+	 * Simulate keyup event if keycode is not present
+	 * in the keymap anymore
+	 */
+	if (test_bit(EV_KEY, dev->evbit) &&
+	    !is_event_supported(old_keycode, dev->keybit, KEY_MAX) &&
+	    __test_and_clear_bit(old_keycode, dev->key)) {
+
+		input_pass_event(dev, EV_KEY, old_keycode, 0);
+		if (dev->sync)
+			input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+	}
+
+ out:
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	return retval;
+}
+EXPORT_SYMBOL(input_set_keycode);
 
 #define MATCH_BIT(bit, max) \
 		for (i = 0; i < BITS_TO_LONGS(max); i++) \
diff --git a/include/linux/input.h b/include/linux/input.h
index 2075d6da2a31..9a963fe97300 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1309,6 +1309,9 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
+int input_get_keycode(struct input_dev *dev, int scancode, int *keycode);
+int input_set_keycode(struct input_dev *dev, int scancode, int keycode);
+
 extern struct class input_class;
 
 /**
-- 
cgit 


From 0c1efd365306c9b04df5abdd41e9b4dc721e84fb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 21 Jan 2008 01:08:24 -0500
Subject: Input: remove cdev from input_dev structure

Cdev field was obsolete and provided only for backward compatibility
since conversion of input core from class devices to regular devices.
It is time to remove it.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 3 ---
 include/linux/input.h | 4 ----
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index e1729e1dd9b2..6ee8af8963f9 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1395,9 +1395,6 @@ int input_register_device(struct input_dev *dev)
 	snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id),
 		 "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1);
 
-	if (dev->cdev.dev)
-		dev->dev.parent = dev->cdev.dev;
-
 	error = device_add(&dev->dev);
 	if (error)
 		return error;
diff --git a/include/linux/input.h b/include/linux/input.h
index 9a963fe97300..48937ffa977a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1018,7 +1018,6 @@ struct ff_effect {
  * @going_away: marks devices that are in a middle of unregistering and
  *	causes input_open_device*() fail with -ENODEV.
  * @dev: driver model's view of this device
- * @cdev: union for struct device pointer
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
@@ -1083,9 +1082,6 @@ struct input_dev {
 	int going_away;
 
 	struct device dev;
-	union {			/* temporarily so while we switching to struct device */
-		struct device *dev;
-	} cdev;
 
 	struct list_head	h_list;
 	struct list_head	node;
-- 
cgit 


From 3eb8749a37990b505ab94466038c067444bbd7eb Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Mon, 14 Jan 2008 18:05:45 +0900
Subject: sony-laptop: add Type4 model

Recent Vaio models (UX, SZ and presumably TZ and others) add more
events and a slightly different handling of Fn key events for
additional hotkeys (s1, s2, zoom-in/out, etc.).

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/misc/sony-laptop.c | 295 ++++++++++++++++++++++++++++-----------------
 include/linux/sonypi.h     |   2 +
 2 files changed, 186 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c
index 98692862fbc3..70d5cc5969aa 100644
--- a/drivers/misc/sony-laptop.c
+++ b/drivers/misc/sony-laptop.c
@@ -146,68 +146,70 @@ struct sony_laptop_keypress {
  * and input layer indexes in the keymap
  */
 static int sony_laptop_input_index[] = {
-	-1,	/* no event */
-	-1,	/* SONYPI_EVENT_JOGDIAL_DOWN */
-	-1,	/* SONYPI_EVENT_JOGDIAL_UP */
-	-1,	/* SONYPI_EVENT_JOGDIAL_DOWN_PRESSED */
-	-1,	/* SONYPI_EVENT_JOGDIAL_UP_PRESSED */
-	-1,	/* SONYPI_EVENT_JOGDIAL_PRESSED */
-	-1,	/* SONYPI_EVENT_JOGDIAL_RELEASED */
-	 0,	/* SONYPI_EVENT_CAPTURE_PRESSED */
-	 1,	/* SONYPI_EVENT_CAPTURE_RELEASED */
-	 2,	/* SONYPI_EVENT_CAPTURE_PARTIALPRESSED */
-	 3,	/* SONYPI_EVENT_CAPTURE_PARTIALRELEASED */
-	 4,	/* SONYPI_EVENT_FNKEY_ESC */
-	 5,	/* SONYPI_EVENT_FNKEY_F1 */
-	 6,	/* SONYPI_EVENT_FNKEY_F2 */
-	 7,	/* SONYPI_EVENT_FNKEY_F3 */
-	 8,	/* SONYPI_EVENT_FNKEY_F4 */
-	 9,	/* SONYPI_EVENT_FNKEY_F5 */
-	10,	/* SONYPI_EVENT_FNKEY_F6 */
-	11,	/* SONYPI_EVENT_FNKEY_F7 */
-	12,	/* SONYPI_EVENT_FNKEY_F8 */
-	13,	/* SONYPI_EVENT_FNKEY_F9 */
-	14,	/* SONYPI_EVENT_FNKEY_F10 */
-	15,	/* SONYPI_EVENT_FNKEY_F11 */
-	16,	/* SONYPI_EVENT_FNKEY_F12 */
-	17,	/* SONYPI_EVENT_FNKEY_1 */
-	18,	/* SONYPI_EVENT_FNKEY_2 */
-	19,	/* SONYPI_EVENT_FNKEY_D */
-	20,	/* SONYPI_EVENT_FNKEY_E */
-	21,	/* SONYPI_EVENT_FNKEY_F */
-	22,	/* SONYPI_EVENT_FNKEY_S */
-	23,	/* SONYPI_EVENT_FNKEY_B */
-	24,	/* SONYPI_EVENT_BLUETOOTH_PRESSED */
-	25,	/* SONYPI_EVENT_PKEY_P1 */
-	26,	/* SONYPI_EVENT_PKEY_P2 */
-	27,	/* SONYPI_EVENT_PKEY_P3 */
-	28,	/* SONYPI_EVENT_BACK_PRESSED */
-	-1,	/* SONYPI_EVENT_LID_CLOSED */
-	-1,	/* SONYPI_EVENT_LID_OPENED */
-	29,	/* SONYPI_EVENT_BLUETOOTH_ON */
-	30,	/* SONYPI_EVENT_BLUETOOTH_OFF */
-	31,	/* SONYPI_EVENT_HELP_PRESSED */
-	32,	/* SONYPI_EVENT_FNKEY_ONLY */
-	33,	/* SONYPI_EVENT_JOGDIAL_FAST_DOWN */
-	34,	/* SONYPI_EVENT_JOGDIAL_FAST_UP */
-	35,	/* SONYPI_EVENT_JOGDIAL_FAST_DOWN_PRESSED */
-	36,	/* SONYPI_EVENT_JOGDIAL_FAST_UP_PRESSED */
-	37,	/* SONYPI_EVENT_JOGDIAL_VFAST_DOWN */
-	38,	/* SONYPI_EVENT_JOGDIAL_VFAST_UP */
-	39,	/* SONYPI_EVENT_JOGDIAL_VFAST_DOWN_PRESSED */
-	40,	/* SONYPI_EVENT_JOGDIAL_VFAST_UP_PRESSED */
-	41,	/* SONYPI_EVENT_ZOOM_PRESSED */
-	42,	/* SONYPI_EVENT_THUMBPHRASE_PRESSED */
-	43,	/* SONYPI_EVENT_MEYE_FACE */
-	44,	/* SONYPI_EVENT_MEYE_OPPOSITE */
-	45,	/* SONYPI_EVENT_MEMORYSTICK_INSERT */
-	46,	/* SONYPI_EVENT_MEMORYSTICK_EJECT */
-	-1,	/* SONYPI_EVENT_ANYBUTTON_RELEASED */
-	-1,	/* SONYPI_EVENT_BATTERY_INSERT */
-	-1,	/* SONYPI_EVENT_BATTERY_REMOVE */
-	-1,	/* SONYPI_EVENT_FNKEY_RELEASED */
-	47,	/* SONYPI_EVENT_WIRELESS_ON */
-	48,	/* SONYPI_EVENT_WIRELESS_OFF */
+	-1,	/*  0 no event */
+	-1,	/*  1 SONYPI_EVENT_JOGDIAL_DOWN */
+	-1,	/*  2 SONYPI_EVENT_JOGDIAL_UP */
+	-1,	/*  3 SONYPI_EVENT_JOGDIAL_DOWN_PRESSED */
+	-1,	/*  4 SONYPI_EVENT_JOGDIAL_UP_PRESSED */
+	-1,	/*  5 SONYPI_EVENT_JOGDIAL_PRESSED */
+	-1,	/*  6 SONYPI_EVENT_JOGDIAL_RELEASED */
+	 0,	/*  7 SONYPI_EVENT_CAPTURE_PRESSED */
+	 1,	/*  8 SONYPI_EVENT_CAPTURE_RELEASED */
+	 2,	/*  9 SONYPI_EVENT_CAPTURE_PARTIALPRESSED */
+	 3,	/* 10 SONYPI_EVENT_CAPTURE_PARTIALRELEASED */
+	 4,	/* 11 SONYPI_EVENT_FNKEY_ESC */
+	 5,	/* 12 SONYPI_EVENT_FNKEY_F1 */
+	 6,	/* 13 SONYPI_EVENT_FNKEY_F2 */
+	 7,	/* 14 SONYPI_EVENT_FNKEY_F3 */
+	 8,	/* 15 SONYPI_EVENT_FNKEY_F4 */
+	 9,	/* 16 SONYPI_EVENT_FNKEY_F5 */
+	10,	/* 17 SONYPI_EVENT_FNKEY_F6 */
+	11,	/* 18 SONYPI_EVENT_FNKEY_F7 */
+	12,	/* 19 SONYPI_EVENT_FNKEY_F8 */
+	13,	/* 20 SONYPI_EVENT_FNKEY_F9 */
+	14,	/* 21 SONYPI_EVENT_FNKEY_F10 */
+	15,	/* 22 SONYPI_EVENT_FNKEY_F11 */
+	16,	/* 23 SONYPI_EVENT_FNKEY_F12 */
+	17,	/* 24 SONYPI_EVENT_FNKEY_1 */
+	18,	/* 25 SONYPI_EVENT_FNKEY_2 */
+	19,	/* 26 SONYPI_EVENT_FNKEY_D */
+	20,	/* 27 SONYPI_EVENT_FNKEY_E */
+	21,	/* 28 SONYPI_EVENT_FNKEY_F */
+	22,	/* 29 SONYPI_EVENT_FNKEY_S */
+	23,	/* 30 SONYPI_EVENT_FNKEY_B */
+	24,	/* 31 SONYPI_EVENT_BLUETOOTH_PRESSED */
+	25,	/* 32 SONYPI_EVENT_PKEY_P1 */
+	26,	/* 33 SONYPI_EVENT_PKEY_P2 */
+	27,	/* 34 SONYPI_EVENT_PKEY_P3 */
+	28,	/* 35 SONYPI_EVENT_BACK_PRESSED */
+	-1,	/* 36 SONYPI_EVENT_LID_CLOSED */
+	-1,	/* 37 SONYPI_EVENT_LID_OPENED */
+	29,	/* 38 SONYPI_EVENT_BLUETOOTH_ON */
+	30,	/* 39 SONYPI_EVENT_BLUETOOTH_OFF */
+	31,	/* 40 SONYPI_EVENT_HELP_PRESSED */
+	32,	/* 41 SONYPI_EVENT_FNKEY_ONLY */
+	33,	/* 42 SONYPI_EVENT_JOGDIAL_FAST_DOWN */
+	34,	/* 43 SONYPI_EVENT_JOGDIAL_FAST_UP */
+	35,	/* 44 SONYPI_EVENT_JOGDIAL_FAST_DOWN_PRESSED */
+	36,	/* 45 SONYPI_EVENT_JOGDIAL_FAST_UP_PRESSED */
+	37,	/* 46 SONYPI_EVENT_JOGDIAL_VFAST_DOWN */
+	38,	/* 47 SONYPI_EVENT_JOGDIAL_VFAST_UP */
+	39,	/* 48 SONYPI_EVENT_JOGDIAL_VFAST_DOWN_PRESSED */
+	40,	/* 49 SONYPI_EVENT_JOGDIAL_VFAST_UP_PRESSED */
+	41,	/* 50 SONYPI_EVENT_ZOOM_PRESSED */
+	42,	/* 51 SONYPI_EVENT_THUMBPHRASE_PRESSED */
+	43,	/* 52 SONYPI_EVENT_MEYE_FACE */
+	44,	/* 53 SONYPI_EVENT_MEYE_OPPOSITE */
+	45,	/* 54 SONYPI_EVENT_MEMORYSTICK_INSERT */
+	46,	/* 55 SONYPI_EVENT_MEMORYSTICK_EJECT */
+	-1,	/* 56 SONYPI_EVENT_ANYBUTTON_RELEASED */
+	-1,	/* 57 SONYPI_EVENT_BATTERY_INSERT */
+	-1,	/* 58 SONYPI_EVENT_BATTERY_REMOVE */
+	-1,	/* 59 SONYPI_EVENT_FNKEY_RELEASED */
+	47,	/* 60 SONYPI_EVENT_WIRELESS_ON */
+	48,	/* 61 SONYPI_EVENT_WIRELESS_OFF */
+	49,	/* 62 SONYPI_EVENT_ZOOM_IN_PRESSED */
+	50,	/* 63 SONYPI_EVENT_ZOOM_OUT_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -260,6 +262,8 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_RESERVED,	/* 46 SONYPI_EVENT_MEMORYSTICK_EJECT */
 	KEY_WLAN,	/* 47 SONYPI_EVENT_WIRELESS_ON */
 	KEY_WLAN,	/* 48 SONYPI_EVENT_WIRELESS_OFF */
+	KEY_ZOOMIN,	/* 49 SONYPI_EVENT_ZOOM_IN_PRESSED */
+	KEY_ZOOMOUT	/* 50 SONYPI_EVENT_ZOOM_OUT_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -1178,10 +1182,12 @@ static struct acpi_driver sony_nc_driver = {
 #define SONYPI_DEVICE_TYPE1	0x00000001
 #define SONYPI_DEVICE_TYPE2	0x00000002
 #define SONYPI_DEVICE_TYPE3	0x00000004
+#define SONYPI_DEVICE_TYPE4	0x00000008
 
 #define SONYPI_TYPE1_OFFSET	0x04
 #define SONYPI_TYPE2_OFFSET	0x12
 #define SONYPI_TYPE3_OFFSET	0x12
+#define SONYPI_TYPE4_OFFSET	0x12
 
 struct sony_pic_ioport {
 	struct acpi_resource_io	io1;
@@ -1202,7 +1208,7 @@ struct sonypi_eventtypes {
 
 struct device_ctrl {
 	int				model;
-	int				(*handle_irq)(void);
+	int				(*handle_irq)(const u8, const u8);
 	u16				evport_offset;
 	u8				has_camera;
 	u8				has_bluetooth;
@@ -1277,6 +1283,7 @@ static struct sonypi_event sonypi_joggerev[] = {
 static struct sonypi_event sonypi_captureev[] = {
 	{ 0x05, SONYPI_EVENT_CAPTURE_PARTIALPRESSED },
 	{ 0x07, SONYPI_EVENT_CAPTURE_PRESSED },
+	{ 0x40, SONYPI_EVENT_CAPTURE_PRESSED },
 	{ 0x01, SONYPI_EVENT_CAPTURE_PARTIALRELEASED },
 	{ 0, 0 }
 };
@@ -1313,7 +1320,6 @@ static struct sonypi_event sonypi_pkeyev[] = {
 	{ 0x01, SONYPI_EVENT_PKEY_P1 },
 	{ 0x02, SONYPI_EVENT_PKEY_P2 },
 	{ 0x04, SONYPI_EVENT_PKEY_P3 },
-	{ 0x5c, SONYPI_EVENT_PKEY_P1 },
 	{ 0, 0 }
 };
 
@@ -1355,6 +1361,8 @@ static struct sonypi_event sonypi_lidev[] = {
 /* The set of possible zoom events */
 static struct sonypi_event sonypi_zoomev[] = {
 	{ 0x39, SONYPI_EVENT_ZOOM_PRESSED },
+	{ 0x10, SONYPI_EVENT_ZOOM_IN_PRESSED },
+	{ 0x20, SONYPI_EVENT_ZOOM_OUT_PRESSED },
 	{ 0, 0 }
 };
 
@@ -1424,55 +1432,19 @@ static struct sonypi_eventtypes type3_events[] = {
 	{ 0x31, SONYPI_PKEY_MASK, sonypi_pkeyev },
 	{ 0 },
 };
-
-static struct device_ctrl spic_types[] = {
-	{
-		.model = SONYPI_DEVICE_TYPE1,
-		.handle_irq = NULL,
-		.evport_offset = SONYPI_TYPE1_OFFSET,
-		.event_types = type1_events,
-	},
-	{
-		.model = SONYPI_DEVICE_TYPE2,
-		.handle_irq = NULL,
-		.evport_offset = SONYPI_TYPE2_OFFSET,
-		.event_types = type2_events,
-	},
-	{
-		.model = SONYPI_DEVICE_TYPE3,
-		.handle_irq = NULL,
-		.evport_offset = SONYPI_TYPE3_OFFSET,
-		.event_types = type3_events,
-	},
+static struct sonypi_eventtypes type4_events[] = {
+	{ 0, 0xffffffff, sonypi_releaseev },
+	{ 0x21, SONYPI_FNKEY_MASK, sonypi_fnkeyev },
+	{ 0x31, SONYPI_WIRELESS_MASK, sonypi_wlessev },
+	{ 0x31, SONYPI_MEMORYSTICK_MASK, sonypi_memorystickev },
+	{ 0x41, SONYPI_BATTERY_MASK, sonypi_batteryev },
+	{ 0x05, SONYPI_PKEY_MASK, sonypi_pkeyev },
+	{ 0x05, SONYPI_ZOOM_MASK, sonypi_zoomev },
+	{ 0x05, SONYPI_CAPTURE_MASK, sonypi_captureev },
+	{ 0 },
 };
 
-static void sony_pic_detect_device_type(struct sony_pic_dev *dev)
-{
-	struct pci_dev *pcidev;
-
-	if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-				     PCI_DEVICE_ID_INTEL_82371AB_3, NULL)))
-		dev->control = &spic_types[0];
-
-	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-					  PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
-		dev->control = &spic_types[2];
-
-	else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-					  PCI_DEVICE_ID_INTEL_ICH7_1, NULL)))
-		dev->control = &spic_types[2];
-
-	else
-		dev->control = &spic_types[1];
-
-	if (pcidev)
-		pci_dev_put(pcidev);
-
-	printk(KERN_INFO DRV_PFX "detected Type%d model\n",
-			dev->control->model == SONYPI_DEVICE_TYPE1 ? 1 :
-			dev->control->model == SONYPI_DEVICE_TYPE2 ? 2 : 3);
-}
-
+/* low level spic calls */
 #define ITERATIONS_LONG		10000
 #define ITERATIONS_SHORT	10
 #define wait_on_command(command, iterations) {				\
@@ -1528,6 +1500,100 @@ static u8 sony_pic_call3(u8 dev, u8 fn, u8 v)
 	return v1;
 }
 
+/*
+ * minidrivers for SPIC models
+ */
+static int type4_handle_irq(const u8 data_mask, const u8 ev)
+{
+	/*
+	 * 0x31 could mean we have to take some extra action and wait for
+	 * the next irq for some Type4 models, it will generate a new
+	 * irq and we can read new data from the device:
+	 *  - 0x5c and 0x5f requires 0xA0
+	 *  - 0x61 requires 0xB3
+	 */
+	if (data_mask == 0x31) {
+		if (ev == 0x5c || ev == 0x5f)
+			sony_pic_call1(0xA0);
+		else if (ev == 0x61)
+			sony_pic_call1(0xB3);
+		return 0;
+	}
+	return 1;
+}
+
+static struct device_ctrl spic_types[] = {
+	{
+		.model = SONYPI_DEVICE_TYPE1,
+		.handle_irq = NULL,
+		.evport_offset = SONYPI_TYPE1_OFFSET,
+		.event_types = type1_events,
+	},
+	{
+		.model = SONYPI_DEVICE_TYPE2,
+		.handle_irq = NULL,
+		.evport_offset = SONYPI_TYPE2_OFFSET,
+		.event_types = type2_events,
+	},
+	{
+		.model = SONYPI_DEVICE_TYPE3,
+		.handle_irq = NULL,
+		.evport_offset = SONYPI_TYPE3_OFFSET,
+		.event_types = type3_events,
+	},
+	{
+		.model = SONYPI_DEVICE_TYPE4,
+		.handle_irq = type4_handle_irq,
+		.evport_offset = SONYPI_TYPE4_OFFSET,
+		.event_types = type4_events,
+	},
+};
+
+static void sony_pic_detect_device_type(struct sony_pic_dev *dev)
+{
+	struct pci_dev *pcidev;
+
+	pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
+	if (pcidev) {
+		dev->control = &spic_types[0];
+		goto out;
+	}
+
+	pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_ICH6_1, NULL);
+	if (pcidev) {
+		dev->control = &spic_types[2];
+		goto out;
+	}
+
+	pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_ICH7_1, NULL);
+	if (pcidev) {
+		dev->control = &spic_types[3];
+		goto out;
+	}
+
+	pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_ICH8_4, NULL);
+	if (pcidev) {
+		dev->control = &spic_types[3];
+		goto out;
+	}
+
+	/* default */
+	dev->control = &spic_types[1];
+
+out:
+	if (pcidev)
+		pci_dev_put(pcidev);
+
+	printk(KERN_INFO DRV_PFX "detected Type%d model\n",
+			dev->control->model == SONYPI_DEVICE_TYPE1 ? 1 :
+			dev->control->model == SONYPI_DEVICE_TYPE2 ? 2 :
+			dev->control->model == SONYPI_DEVICE_TYPE3 ? 3 : 4);
+}
+
 /* camera tests and poweron/poweroff */
 #define SONYPI_CAMERA_PICTURE		5
 #define SONYPI_CAMERA_CONTROL		0x10
@@ -2406,6 +2472,13 @@ static irqreturn_t sony_pic_irq(int irq, void *dev_id)
 			}
 		}
 	}
+	/* Still not able to decode the event try to pass
+	 * it over to the minidriver
+	 */
+	if (dev->control->handle_irq &&
+			dev->control->handle_irq(data_mask, ev) == 0)
+		return IRQ_HANDLED;
+
 	dprintk("unknown event ([%.2x] [%.2x]) at port 0x%.4x(+0x%.2x)\n",
 			ev, data_mask, dev->cur_ioport->io1.minimum,
 			dev->control->evport_offset);
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 40c7b5d993b9..f41ffd7c2dd9 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -101,6 +101,8 @@
 #define SONYPI_EVENT_FNKEY_RELEASED		59
 #define SONYPI_EVENT_WIRELESS_ON		60
 #define SONYPI_EVENT_WIRELESS_OFF		61
+#define SONYPI_EVENT_ZOOM_IN_PRESSED		62
+#define SONYPI_EVENT_ZOOM_OUT_PRESSED		63
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit 


From 866136827b9a71c39dcb06d23ce523f719eab74b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 24 Jan 2008 16:15:14 +0200
Subject: UBI: introduce atomic LEB change ioctl

We have to be able to change individual LEBs for utilities like
ubifsck, ubifstune. For example, ubifsck has to be able to fix
errors on the media, ubifstune has to be able to change the
the superblock, hence this ioctl.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 include/linux/mtd/ubi.h | 17 -----------------
 include/mtd/ubi-user.h  | 51 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index c4abe0351225..f71201d0f3e7 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -25,23 +25,6 @@
 #include <linux/types.h>
 #include <mtd/ubi-user.h>
 
-/*
- * UBI data type hint constants.
- *
- * UBI_LONGTERM: long-term data
- * UBI_SHORTTERM: short-term data
- * UBI_UNKNOWN: data persistence is unknown
- *
- * These constants are used when data is written to UBI volumes in order to
- * help the UBI wear-leveling unit to find more appropriate physical
- * eraseblocks.
- */
-enum {
-	UBI_LONGTERM = 1,
-	UBI_SHORTTERM,
-	UBI_UNKNOWN
-};
-
 /*
  * enum ubi_open_mode - UBI volume open mode constants.
  *
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index 4d184a7f80a8..a7421f130cc0 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -63,7 +63,7 @@
  *
  * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the
  * corresponding UBI volume character device. A pointer to a 64-bit update
- * size should be passed to the IOCTL. After then, UBI expects user to write
+ * size should be passed to the IOCTL. After this, UBI expects user to write
  * this number of bytes to the volume character device. The update is finished
  * when the claimed number of bytes is passed. So, the volume update sequence
  * is something like:
@@ -72,6 +72,15 @@
  * ioctl(fd, UBI_IOCVOLUP, &image_size);
  * write(fd, buf, image_size);
  * close(fd);
+ *
+ * Atomic eraseblock change
+ * ~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Atomic eraseblock change operation is done via the %UBI_IOCEBCH IOCTL
+ * command of the corresponding UBI volume character device. A pointer to
+ * &struct ubi_leb_change_req has to be passed to the IOCTL. Then the user is
+ * expected to write the requested amount of bytes. This is similar to the
+ * "volume update" IOCTL.
  */
 
 /*
@@ -113,10 +122,29 @@
 #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t)
 /* An eraseblock erasure command, used for debugging, disabled by default */
 #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t)
+/* An atomic eraseblock change command */
+#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t)
 
 /* Maximum MTD device name length supported by UBI */
 #define MAX_UBI_MTD_NAME_LEN 127
 
+/*
+ * UBI data type hint constants.
+ *
+ * UBI_LONGTERM: long-term data
+ * UBI_SHORTTERM: short-term data
+ * UBI_UNKNOWN: data persistence is unknown
+ *
+ * These constants are used when data is written to UBI volumes in order to
+ * help the UBI wear-leveling unit to find more appropriate physical
+ * eraseblocks.
+ */
+enum {
+	UBI_LONGTERM  = 1,
+	UBI_SHORTTERM = 2,
+	UBI_UNKNOWN   = 3,
+};
+
 /*
  * UBI volume type constants.
  *
@@ -125,7 +153,7 @@
  */
 enum {
 	UBI_DYNAMIC_VOLUME = 3,
-	UBI_STATIC_VOLUME = 4,
+	UBI_STATIC_VOLUME  = 4,
 };
 
 /**
@@ -137,7 +165,7 @@ enum {
  *
  * This data structure is used to specify MTD device UBI has to attach and the
  * parameters it has to use. The number which should be assigned to the new UBI
- * device is passed in @ubi_num. UBI may automatically assing the number if
+ * device is passed in @ubi_num. UBI may automatically assign the number if
  * @UBI_DEV_NUM_AUTO is passed. In this case, the device number is returned in
  * @ubi_num.
  *
@@ -176,7 +204,7 @@ struct ubi_attach_req {
  * @padding2: reserved for future, not used, has to be zeroed
  * @name: volume name
  *
- * This structure is used by userspace programs when creating new volumes. The
+ * This structure is used by user-space programs when creating new volumes. The
  * @used_bytes field is only necessary when creating static volumes.
  *
  * The @alignment field specifies the required alignment of the volume logical
@@ -222,4 +250,19 @@ struct ubi_rsvol_req {
 	int32_t vol_id;
 } __attribute__ ((packed));
 
+/**
+ * struct ubi_leb_change_req - a data structure used in atomic logical
+ *                             eraseblock change requests.
+ * @lnum: logical eraseblock number to change
+ * @bytes: how many bytes will be written to the logical eraseblock
+ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
+ * @padding: reserved for future, not used, has to be zeroed
+ */
+struct ubi_leb_change_req {
+	int32_t lnum;
+	int32_t bytes;
+	uint8_t dtype;
+	uint8_t padding[7];
+} __attribute__ ((packed));
+
 #endif /* __UBI_USER_H__ */
-- 
cgit 


From e71f04fc9234b14636887ceb5862755f1690642c Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Tue, 11 Dec 2007 11:23:45 +0900
Subject: [MTD] [OneNAND] Get correct density from device ID

Use the higher bits for other purpose.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/mtd/onenand/onenand_base.c | 20 ++++++++++++++++----
 include/linux/mtd/onenand_regs.h   |  1 +
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index c79bc2ef3f50..cf8009329999 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -169,6 +169,18 @@ static int onenand_buffer_address(int dataram1, int sectors, int count)
 	return ((bsa << ONENAND_BSA_SHIFT) | bsc);
 }
 
+/**
+ * onenand_get_density - [DEFAULT] Get OneNAND density
+ * @param dev_id	OneNAND device ID
+ *
+ * Get OneNAND density from device ID
+ */
+static inline int onenand_get_density(int dev_id)
+{
+	int density = dev_id >> ONENAND_DEVICE_DENSITY_SHIFT;
+	return (density & ONENAND_DEVICE_DENSITY_MASK);
+}
+
 /**
  * onenand_command - [DEFAULT] Send command to OneNAND device
  * @param mtd		MTD device structure
@@ -2146,7 +2158,7 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 
 	*retlen = 0;
 
-	density = this->device_id >> ONENAND_DEVICE_DENSITY_SHIFT;
+	density = onenand_get_density(this->device_id);
 	if (density < ONENAND_DEVICE_DENSITY_512Mb)
 		otp_pages = 20;
 	else
@@ -2337,7 +2349,7 @@ static void onenand_check_features(struct mtd_info *mtd)
 	unsigned int density, process;
 
 	/* Lock scheme depends on density and process */
-	density = this->device_id >> ONENAND_DEVICE_DENSITY_SHIFT;
+	density = onenand_get_density(this->device_id);
 	process = this->version_id >> ONENAND_VERSION_PROCESS_SHIFT;
 
 	/* Lock scheme */
@@ -2386,7 +2398,7 @@ static void onenand_print_device_info(int device, int version)
         vcc = device & ONENAND_DEVICE_VCC_MASK;
         demuxed = device & ONENAND_DEVICE_IS_DEMUX;
         ddp = device & ONENAND_DEVICE_IS_DDP;
-        density = device >> ONENAND_DEVICE_DENSITY_SHIFT;
+        density = onenand_get_density(device);
         printk(KERN_INFO "%sOneNAND%s %dMB %sV 16-bit (0x%02x)\n",
                 demuxed ? "" : "Muxed ",
                 ddp ? "(DDP)" : "",
@@ -2478,7 +2490,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	this->device_id = dev_id;
 	this->version_id = ver_id;
 
-	density = dev_id >> ONENAND_DEVICE_DENSITY_SHIFT;
+	density = onenand_get_density(dev_id);
 	this->chipsize = (16 << density) << 20;
 	/* Set density mask. it is used for DDP */
 	if (ONENAND_IS_DDP(this))
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index c46161f4eee3..d1b310c92eb4 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -67,6 +67,7 @@
 /*
  * Device ID Register F001h (R)
  */
+#define ONENAND_DEVICE_DENSITY_MASK	(0xf)
 #define ONENAND_DEVICE_DENSITY_SHIFT	(4)
 #define ONENAND_DEVICE_IS_DDP		(1 << 3)
 #define ONENAND_DEVICE_IS_DEMUX		(1 << 2)
-- 
cgit 


From f757397097d0713c949af76dccabb65a2785782e Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@mvista.com>
Date: Thu, 31 Jan 2008 17:28:18 -0800
Subject: cpuidle: build fix for non-x86

Convert cpu_idle_wait() to cpuidle_kick_cpus() macro which is
SMP-only, and gives error on non supported CPU.

Signed-off-by: Kevin Hilman <khilman@mvista.com>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/Kconfig          |  3 +++
 drivers/cpuidle/cpuidle.c |  2 +-
 include/linux/cpuidle.h   | 13 +++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 368864dfe6eb..37d1297e6787 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -105,6 +105,9 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	def_bool n
 
+config ARCH_HAS_CPU_IDLE_WAIT
+	def_bool y
+
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d2fabe7863a9..794962d9f48b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -82,7 +82,7 @@ void cpuidle_uninstall_idle_handler(void)
 {
 	if (enabled_devices && (pm_idle != pm_idle_old)) {
 		pm_idle = pm_idle_old;
-		cpu_idle_wait();
+		cpuidle_kick_cpus();
 	}
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 16a51546db44..cb95f5a9075a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -72,6 +72,19 @@ cpuidle_set_statedata(struct cpuidle_state *state, void *data)
 	state->driver_data = data;
 }
 
+#ifdef CONFIG_SMP
+#ifdef CONFIG_ARCH_HAS_CPU_IDLE_WAIT
+static inline void cpuidle_kick_cpus(void)
+{
+	cpu_idle_wait();
+}
+#else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT */
+#error "Arch needs cpu_idle_wait() equivalent here"
+#endif /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT */
+#else /* !CONFIG_SMP */
+static inline void cpuidle_kick_cpus(void) {}
+#endif /* !CONFIG_SMP */
+
 struct cpuidle_state_kobj {
 	struct cpuidle_state *state;
 	struct completion kobj_unregister;
-- 
cgit 


From 203d3d4aa482339b4816f131f713e1b8ee37f6dd Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Thu, 17 Jan 2008 15:51:08 +0800
Subject: the generic thermal sysfs driver

The Generic Thermal sysfs driver for thermal management.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Thomas Sujith <sujith.thomas@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 246 +++++++++++++
 drivers/Kconfig                     |   2 +
 drivers/Makefile                    |   1 +
 drivers/thermal/Kconfig             |  15 +
 drivers/thermal/Makefile            |   5 +
 drivers/thermal/thermal.c           | 714 ++++++++++++++++++++++++++++++++++++
 include/linux/thermal.h             |  90 +++++
 7 files changed, 1073 insertions(+)
 create mode 100644 Documentation/thermal/sysfs-api.txt
 create mode 100644 drivers/thermal/Kconfig
 create mode 100644 drivers/thermal/Makefile
 create mode 100644 drivers/thermal/thermal.c
 create mode 100644 include/linux/thermal.h

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
new file mode 100644
index 000000000000..5776e090359d
--- /dev/null
+++ b/Documentation/thermal/sysfs-api.txt
@@ -0,0 +1,246 @@
+Generic Thermal Sysfs driver How To
+=========================
+
+Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
+
+Updated: 2 January 2008
+
+Copyright (c)  2008 Intel Corporation
+
+
+0. Introduction
+
+The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors)
+and thermal cooling devices (fan, processor...) to register with the thermal management
+solution and to be a part of it.
+
+This how-to focusses on enabling new thermal zone and cooling devices to participate
+in thermal management.
+This solution is platform independent and any type of thermal zone devices and
+cooling devices should be able to make use of the infrastructure.
+
+The main task of the thermal sysfs driver is to expose thermal zone attributes as well
+as cooling device attributes to the user space.
+An intelligent thermal management application can make decisions based on inputs
+from thermal zone attributes (the current temperature and trip point temperature)
+and throttle appropriate devices.
+
+[0-*]	denotes any positive number starting from 0
+[1-*]	denotes any positive number starting from 1
+
+1. thermal sysfs driver interface functions
+
+1.1 thermal zone device interface
+1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips,
+				void *devdata, struct thermal_zone_device_ops *ops)
+
+	This interface function adds a new thermal zone device (sensor) to
+	/sys/class/thermal folder as thermal_zone[0-*].
+	It tries to bind all the thermal cooling devices registered at the same time.
+
+	name: the thermal zone name.
+	trips: the total number of trip points this thermal zone supports.
+	devdata: device private data
+	ops: thermal zone device callbacks.
+		.bind: bind the thermal zone device with a thermal cooling device.
+		.unbind: unbing the thermal zone device with a thermal cooling device.
+		.get_temp: get the current temperature of the thermal zone.
+		.get_mode: get the current mode (user/kernel) of the thermal zone.
+			   "kernel" means thermal management is done in kernel.
+			   "user" will prevent kernel thermal driver actions upon trip points
+			   so that user applications can take charge of thermal management.
+		.set_mode: set the mode (user/kernel) of the thermal zone.
+		.get_trip_type: get the type of certain trip point.
+		.get_trip_temp: get the temperature above which the certain trip point
+				will be fired.
+
+1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+
+	This interface function removes the thermal zone device.
+	It deletes the corresponding entry form /sys/class/thermal folder and unbind all
+	the thermal cooling devices it uses.
+
+1.2 thermal cooling device interface
+1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name,
+					void *devdata, struct thermal_cooling_device_ops *)
+
+	This interface function adds a new thermal cooling device (fan/processor/...) to
+	/sys/class/thermal/ folder as cooling_device[0-*].
+	It tries to bind itself to all the thermal zone devices register at the same time.
+	name: the cooling device name.
+	devdata: device private data.
+	ops: thermal cooling devices callbacks.
+		.get_max_state: get the Maximum throttle state of the cooling device.
+		.get_cur_state: get the Current throttle state of the cooling device.
+		.set_cur_state: set the Current throttle state of the cooling device.
+
+1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
+
+	This interface function remove the thermal cooling device.
+	It deletes the corresponding entry form /sys/class/thermal folder and unbind
+	itself from all	the thermal zone devices using it.
+
+1.3 interface for binding a thermal zone device with a thermal cooling device
+1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+			int trip, struct thermal_cooling_device *cdev);
+
+	This interface function bind a thermal cooling device to the certain trip point
+	of a thermal zone device.
+	This function is usually called in the thermal zone device .bind callback.
+	tz: the thermal zone device
+	cdev: thermal cooling device
+	trip: indicates which trip point the cooling devices is associated with
+		 in this thermal zone.
+
+1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+				int trip, struct thermal_cooling_device *cdev);
+
+	This interface function unbind a thermal cooling device from the certain trip point
+	of a thermal zone device.
+	This function is usually called in the thermal zone device .unbind callback.
+	tz: the thermal zone device
+	cdev: thermal cooling device
+	trip: indicates which trip point the cooling devices is associated with
+		in this thermal zone.
+
+2. sysfs attributes structure
+
+RO	read only value
+RW	read/write value
+
+All thermal sysfs attributes will be represented under /sys/class/thermal
+/sys/class/thermal/
+
+Thermal zone device sys I/F, created once it's registered:
+|thermal_zone[0-*]:
+	|-----type:			Type of the thermal zone
+	|-----temp:			Current temperature
+	|-----mode:			Working mode of the thermal zone
+	|-----trip_point_[0-*]_temp:	Trip point temperature
+	|-----trip_point_[0-*]_type:	Trip point type
+
+Thermal cooling device sys I/F, created once it's registered:
+|cooling_device[0-*]:
+	|-----type :			Type of the cooling device(processor/fan/...)
+	|-----max_state:		Maximum cooling state of the cooling device
+	|-----cur_state:		Current cooling state of the cooling device
+
+
+These two dynamic attributes are created/removed in pairs.
+They represent the relationship between a thermal zone and its associated cooling device.
+They are created/removed for each
+thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful exection.
+
+|thermal_zone[0-*]
+	|-----cdev[0-*]:		The [0-*]th cooling device in the current thermal zone
+	|-----cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
+
+
+***************************
+* Thermal zone attributes *
+***************************
+
+type				Strings which represent the thermal zone type.
+				This is given by thermal zone driver as part of registration.
+				Eg: "ACPI thermal zone" indicates it's a ACPI thermal device
+				RO
+				Optional
+
+temp				Current temperature as reported by thermal zone (sensor)
+				Unit: degree celsius
+				RO
+				Required
+
+mode				One of the predifned values in [kernel, user]
+				This file gives information about the algorithm
+				that is currently managing the thermal zone.
+				It can be either default kernel based algorithm
+				or user space application.
+				RW
+				Optional
+				kernel	= Thermal management in kernel thermal zone driver.
+				user	= Preventing kernel thermal zone driver actions upon
+					  trip points so that user application can take full
+					  charge of the thermal management.
+
+trip_point_[0-*]_temp		The temperature above which trip point will be fired
+				Unit: degree celsius
+				RO
+				Optional
+
+trip_point_[0-*]_type 		Strings which indicate the type of the trip point
+				Eg. it can be one of critical, hot, passive,
+				    active[0-*] for ACPI thermal zone.
+				RO
+				Optional
+
+cdev[0-*]			Sysfs link to the thermal cooling device node where the sys I/F
+				for cooling device throttling control represents.
+				RO
+				Optional
+
+cdev[0-*]_trip_point		The trip point with which cdev[0-*] is assocated in this thermal zone
+				-1 means the cooling device is not associated with any trip point.
+				RO
+				Optional
+
+******************************
+* Cooling device  attributes *
+******************************
+
+type				String which represents the type of device
+				eg: For generic ACPI: this should be "Fan",
+				"Processor" or "LCD"
+				eg. For memory controller device on intel_menlow platform:
+				this should be "Memory controller"
+				RO
+				Optional
+
+max_state			The maximum permissible cooling state of this cooling device.
+				RO
+				Required
+
+cur_state			The current cooling state of this cooling device.
+				the value can any integer numbers between 0 and max_state,
+				cur_state == 0 means no cooling
+				cur_state == max_state means the maximum cooling.
+				RW
+				Required
+
+3. A simple implementation
+
+ACPI thermal zone may support multiple trip points like critical/hot/passive/active.
+If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time,
+it may register itself as a thermale_zone_device (thermal_zone1) with 4 trip points in all.
+It has one processor and one fan, which are both registered as thermal_cooling_device.
+If the processor is listed in _PSL method, and the fan is listed in _AL0 method,
+the sys I/F structure will be built like this:
+
+/sys/class/thermal:
+
+|thermal_zone1:
+	|-----type:			ACPI thermal zone
+	|-----temp:			37
+	|-----mode:			kernel
+	|-----trip_point_0_temp:	100
+	|-----trip_point_0_type:	critical
+	|-----trip_point_1_temp:	80
+	|-----trip_point_1_type:	passive
+	|-----trip_point_2_temp:	70
+	|-----trip_point_2_type:	active[0]
+	|-----trip_point_3_temp:	60
+	|-----trip_point_3_type:	active[1]
+	|-----cdev0:			--->/sys/class/thermal/cooling_device0
+	|-----cdev0_trip_point:		1	/* cdev0 can be used for passive */
+	|-----cdev1:			--->/sys/class/thermal/cooling_device3
+	|-----cdev1_trip_point:		2	/* cdev1 can be used for active[0]*/
+
+|cooling_device0:
+	|-----type:			Processor
+	|-----max_state:		8
+	|-----cur_state:		0
+
+|cooling_device3:
+	|-----type:			Fan
+	|-----max_state:		2
+	|-----cur_state:		0
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 08d4ae201597..8e238cfc0795 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -58,6 +58,8 @@ source "drivers/power/Kconfig"
 
 source "drivers/hwmon/Kconfig"
 
+source "drivers/thermal/Kconfig"
+
 source "drivers/watchdog/Kconfig"
 
 source "drivers/ssb/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 0ee9a8a4095e..a516b8b19127 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,6 +64,7 @@ obj-y				+= i2c/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_POWER_SUPPLY)	+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
+obj-$(CONFIG_THERMAL)		+= thermal/
 obj-$(CONFIG_WATCHDOG)		+= watchdog/
 obj-$(CONFIG_PHONE)		+= telephony/
 obj-$(CONFIG_MD)		+= md/
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
new file mode 100644
index 000000000000..9b3f61200000
--- /dev/null
+++ b/drivers/thermal/Kconfig
@@ -0,0 +1,15 @@
+#
+# Generic thermal sysfs drivers configuration
+#
+
+menuconfig THERMAL
+	bool "Generic Thermal sysfs driver"
+	default y
+	help
+	  Generic Thermal Sysfs driver offers a generic mechanism for
+	  thermal management. Usually it's made up of one or more thermal
+	  zone and cooling device.
+	  each thermal zone contains its own temperature, trip points,
+	  cooling devices.
+	  All platforms with ACPI thermal support can use this driver.
+	  If you want this support, you should say Y here
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
new file mode 100644
index 000000000000..8ef1232de376
--- /dev/null
+++ b/drivers/thermal/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for sensor chip drivers.
+#
+
+obj-$(CONFIG_THERMAL)		+= thermal.o
diff --git a/drivers/thermal/thermal.c b/drivers/thermal/thermal.c
new file mode 100644
index 000000000000..3273e348fd14
--- /dev/null
+++ b/drivers/thermal/thermal.c
@@ -0,0 +1,714 @@
+/*
+ *  thermal.c - Generic Thermal Management Sysfs support.
+ *
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/kdev_t.h>
+#include <linux/idr.h>
+#include <linux/thermal.h>
+#include <linux/spinlock.h>
+
+MODULE_AUTHOR("Zhang Rui")
+MODULE_DESCRIPTION("Generic thermal management sysfs support");
+MODULE_LICENSE("GPL");
+
+#define PREFIX "Thermal: "
+
+struct thermal_cooling_device_instance {
+	int id;
+	char name[THERMAL_NAME_LENGTH];
+	struct thermal_zone_device *tz;
+	struct thermal_cooling_device *cdev;
+	int trip;
+	char attr_name[THERMAL_NAME_LENGTH];
+	struct device_attribute attr;
+	struct list_head node;
+};
+
+static DEFINE_IDR(thermal_tz_idr);
+static DEFINE_IDR(thermal_cdev_idr);
+static DEFINE_MUTEX(thermal_idr_lock);
+
+static LIST_HEAD(thermal_tz_list);
+static LIST_HEAD(thermal_cdev_list);
+static DEFINE_MUTEX(thermal_list_lock);
+
+static int get_idr(struct idr *idr, struct mutex *lock, int *id)
+{
+	int err;
+
+      again:
+	if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
+		return -ENOMEM;
+
+	if (lock)
+		mutex_lock(lock);
+	err = idr_get_new(idr, NULL, id);
+	if (lock)
+		mutex_unlock(lock);
+	if (unlikely(err == -EAGAIN))
+		goto again;
+	else if (unlikely(err))
+		return err;
+
+	*id = *id & MAX_ID_MASK;
+	return 0;
+}
+
+static void release_idr(struct idr *idr, struct mutex *lock, int id)
+{
+	if (lock)
+		mutex_lock(lock);
+	idr_remove(idr, id);
+	if (lock)
+		mutex_unlock(lock);
+}
+
+/* sys I/F for thermal zone */
+
+#define to_thermal_zone(_dev) \
+	container_of(_dev, struct thermal_zone_device, device)
+
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	return sprintf(buf, "%s\n", tz->type);
+}
+
+static ssize_t
+temp_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	if (!tz->ops->get_temp)
+		return -EPERM;
+
+	return tz->ops->get_temp(tz, buf);
+}
+
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	if (!tz->ops->get_mode)
+		return -EPERM;
+
+	return tz->ops->get_mode(tz, buf);
+}
+
+static ssize_t
+mode_store(struct device *dev, struct device_attribute *attr,
+	   const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int result;
+
+	if (!tz->ops->set_mode)
+		return -EPERM;
+
+	result = tz->ops->set_mode(tz, buf);
+	if (result)
+		return result;
+
+	return count;
+}
+
+static ssize_t
+trip_point_type_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip;
+
+	if (!tz->ops->get_trip_type)
+		return -EPERM;
+
+	if (!sscanf(attr->attr.name, "trip_point_%d_type", &trip))
+		return -EINVAL;
+
+	return tz->ops->get_trip_type(tz, trip, buf);
+}
+
+static ssize_t
+trip_point_temp_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int trip;
+
+	if (!tz->ops->get_trip_temp)
+		return -EPERM;
+
+	if (!sscanf(attr->attr.name, "trip_point_%d_temp", &trip))
+		return -EINVAL;
+
+	return tz->ops->get_trip_temp(tz, trip, buf);
+}
+
+static DEVICE_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR(temp, 0444, temp_show, NULL);
+static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
+
+static struct device_attribute trip_point_attrs[] = {
+	__ATTR(trip_point_0_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_0_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_1_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_1_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_2_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_2_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_3_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_3_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_4_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_4_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_5_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_5_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_6_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_6_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_7_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_7_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_8_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_8_temp, 0444, trip_point_temp_show, NULL),
+	__ATTR(trip_point_9_type, 0444, trip_point_type_show, NULL),
+	__ATTR(trip_point_9_temp, 0444, trip_point_temp_show, NULL),
+};
+
+#define TRIP_POINT_ATTR_ADD(_dev, _index, result)     \
+do {    \
+	result = device_create_file(_dev,	\
+				&trip_point_attrs[_index * 2]);	\
+	if (result)	\
+		break;	\
+	result = device_create_file(_dev,	\
+			&trip_point_attrs[_index * 2 + 1]);	\
+} while (0)
+
+#define TRIP_POINT_ATTR_REMOVE(_dev, _index)	\
+do {	\
+	device_remove_file(_dev, &trip_point_attrs[_index * 2]);	\
+	device_remove_file(_dev, &trip_point_attrs[_index * 2 + 1]);	\
+} while (0)
+
+/* sys I/F for cooling device */
+#define to_cooling_device(_dev)	\
+	container_of(_dev, struct thermal_cooling_device, device)
+
+static ssize_t
+thermal_cooling_device_type_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+
+	return sprintf(buf, "%s\n", cdev->type);
+}
+
+static ssize_t
+thermal_cooling_device_max_state_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+
+	return cdev->ops->get_max_state(cdev, buf);
+}
+
+static ssize_t
+thermal_cooling_device_cur_state_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+
+	return cdev->ops->get_cur_state(cdev, buf);
+}
+
+static ssize_t
+thermal_cooling_device_cur_state_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	int state;
+	int result;
+
+	if (!sscanf(buf, "%d\n", &state))
+		return -EINVAL;
+
+	if (state < 0)
+		return -EINVAL;
+
+	result = cdev->ops->set_cur_state(cdev, state);
+	if (result)
+		return result;
+	return count;
+}
+
+static struct device_attribute dev_attr_cdev_type =
+		__ATTR(type, 0444, thermal_cooling_device_type_show, NULL);
+static DEVICE_ATTR(max_state, 0444,
+		   thermal_cooling_device_max_state_show, NULL);
+static DEVICE_ATTR(cur_state, 0644,
+		   thermal_cooling_device_cur_state_show,
+		   thermal_cooling_device_cur_state_store);
+
+static ssize_t
+thermal_cooling_device_trip_point_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct thermal_cooling_device_instance *instance;
+
+	instance =
+	    container_of(attr, struct thermal_cooling_device_instance, attr);
+
+	if (instance->trip == THERMAL_TRIPS_NONE)
+		return sprintf(buf, "-1\n");
+	else
+		return sprintf(buf, "%d\n", instance->trip);
+}
+
+/* Device management */
+
+/**
+ * thermal_zone_bind_cooling_device - bind a cooling device to a thermal zone
+ * this function is usually called in the thermal zone device .bind callback.
+ * @tz:		thermal zone device
+ * @trip:	indicates which trip point the cooling devices is
+ *		associated with in this thermal zone.
+ * @cdev:	thermal cooling device
+ */
+int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+				     int trip,
+				     struct thermal_cooling_device *cdev)
+{
+	struct thermal_cooling_device_instance *dev;
+	struct thermal_cooling_device_instance *pos;
+	int result;
+
+	if (trip >= tz->trips ||
+	    (trip < 0 && trip != THERMAL_TRIPS_NONE))
+		return -EINVAL;
+
+	if (!tz || !cdev)
+		return -EINVAL;
+
+	dev =
+	    kzalloc(sizeof(struct thermal_cooling_device_instance), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->tz = tz;
+	dev->cdev = cdev;
+	dev->trip = trip;
+	result = get_idr(&tz->idr, &tz->lock, &dev->id);
+	if (result)
+		goto free_mem;
+
+	sprintf(dev->name, "cdev%d", dev->id);
+	result =
+	    sysfs_create_link(&tz->device.kobj, &cdev->device.kobj, dev->name);
+	if (result)
+		goto release_idr;
+
+	sprintf(dev->attr_name, "cdev%d_trip_point", dev->id);
+	dev->attr.attr.name = dev->attr_name;
+	dev->attr.attr.mode = 0444;
+	dev->attr.show = thermal_cooling_device_trip_point_show;
+	result = device_create_file(&tz->device, &dev->attr);
+	if (result)
+		goto remove_symbol_link;
+
+	mutex_lock(&tz->lock);
+	list_for_each_entry(pos, &tz->cooling_devices, node)
+	    if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
+		result = -EEXIST;
+		break;
+	}
+	if (!result)
+		list_add_tail(&dev->node, &tz->cooling_devices);
+	mutex_unlock(&tz->lock);
+
+	if (!result)
+		return 0;
+
+	device_remove_file(&tz->device, &dev->attr);
+      remove_symbol_link:
+	sysfs_remove_link(&tz->device.kobj, dev->name);
+      release_idr:
+	release_idr(&tz->idr, &tz->lock, dev->id);
+      free_mem:
+	kfree(dev);
+	return result;
+}
+EXPORT_SYMBOL(thermal_zone_bind_cooling_device);
+
+/**
+ * thermal_zone_unbind_cooling_device - unbind a cooling device from a thermal zone
+ * this function is usually called in the thermal zone device .unbind callback.
+ * @tz:		thermal zone device
+ * @trip:	indicates which trip point the cooling devices is
+ *		associated with in this thermal zone.
+ * @cdev:	thermal cooling device
+ */
+int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+				       int trip,
+				       struct thermal_cooling_device *cdev)
+{
+	struct thermal_cooling_device_instance *pos, *next;
+
+	mutex_lock(&tz->lock);
+	list_for_each_entry_safe(pos, next, &tz->cooling_devices, node) {
+		if (pos->tz == tz && pos->trip == trip
+		    && pos->cdev == cdev) {
+			list_del(&pos->node);
+			mutex_unlock(&tz->lock);
+			goto unbind;
+		}
+	}
+	mutex_unlock(&tz->lock);
+
+	return -ENODEV;
+
+      unbind:
+	device_remove_file(&tz->device, &pos->attr);
+	sysfs_remove_link(&tz->device.kobj, pos->name);
+	release_idr(&tz->idr, &tz->lock, pos->id);
+	kfree(pos);
+	return 0;
+}
+EXPORT_SYMBOL(thermal_zone_unbind_cooling_device);
+
+static void thermal_release(struct device *dev)
+{
+	struct thermal_zone_device *tz;
+	struct thermal_cooling_device *cdev;
+
+	if (!strncmp(dev->bus_id, "thermal_zone", sizeof "thermal_zone" - 1)) {
+		tz = to_thermal_zone(dev);
+		kfree(tz);
+	} else {
+		cdev = to_cooling_device(dev);
+		kfree(cdev);
+	}
+}
+
+static struct class thermal_class = {
+	.name = "thermal",
+	.dev_release = thermal_release,
+};
+
+/**
+ * thermal_cooling_device_register - register a new thermal cooling device
+ * @type:	the thermal cooling device type.
+ * @devdata:	device private data.
+ * @ops:		standard thermal cooling devices callbacks.
+ */
+struct thermal_cooling_device *thermal_cooling_device_register(char *type,
+		       void *devdata, struct thermal_cooling_device_ops *ops)
+{
+	struct thermal_cooling_device *cdev;
+	struct thermal_zone_device *pos;
+	int result;
+
+	if (strlen(type) >= THERMAL_NAME_LENGTH)
+		return NULL;
+
+	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
+		!ops->set_cur_state)
+		return NULL;
+
+	cdev = kzalloc(sizeof(struct thermal_cooling_device), GFP_KERNEL);
+	if (!cdev)
+		return NULL;
+
+	result = get_idr(&thermal_cdev_idr, &thermal_idr_lock, &cdev->id);
+	if (result) {
+		kfree(cdev);
+		return NULL;
+	}
+
+	strcpy(cdev->type, type);
+	cdev->ops = ops;
+	cdev->device.class = &thermal_class;
+	cdev->devdata = devdata;
+	sprintf(cdev->device.bus_id, "cooling_device%d", cdev->id);
+	result = device_register(&cdev->device);
+	if (result) {
+		release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
+		kfree(cdev);
+		return NULL;
+	}
+
+	/* sys I/F */
+	if (type) {
+		result = device_create_file(&cdev->device,
+					    &dev_attr_cdev_type);
+		if (result)
+			goto unregister;
+	}
+
+	result = device_create_file(&cdev->device, &dev_attr_max_state);
+	if (result)
+		goto unregister;
+
+	result = device_create_file(&cdev->device, &dev_attr_cur_state);
+	if (result)
+		goto unregister;
+
+	mutex_lock(&thermal_list_lock);
+	list_add(&cdev->node, &thermal_cdev_list);
+	list_for_each_entry(pos, &thermal_tz_list, node) {
+		if (!pos->ops->bind)
+			continue;
+		result = pos->ops->bind(pos, cdev);
+		if (result)
+			break;
+
+	}
+	mutex_unlock(&thermal_list_lock);
+
+	if (!result)
+		return cdev;
+
+      unregister:
+	release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
+	device_unregister(&cdev->device);
+	return NULL;
+}
+EXPORT_SYMBOL(thermal_cooling_device_register);
+
+/**
+ * thermal_cooling_device_unregister - removes the registered thermal cooling device
+ *
+ * @cdev:	the thermal cooling device to remove.
+ *
+ * thermal_cooling_device_unregister() must be called when the device is no
+ * longer needed.
+ */
+void thermal_cooling_device_unregister(struct
+				       thermal_cooling_device
+				       *cdev)
+{
+	struct thermal_zone_device *tz;
+	struct thermal_cooling_device *pos = NULL;
+
+	if (!cdev)
+		return;
+
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(pos, &thermal_cdev_list, node)
+	    if (pos == cdev)
+		break;
+	if (pos != cdev) {
+		/* thermal cooling device not found */
+		mutex_unlock(&thermal_list_lock);
+		return;
+	}
+	list_del(&cdev->node);
+	list_for_each_entry(tz, &thermal_tz_list, node) {
+		if (!tz->ops->unbind)
+			continue;
+		tz->ops->unbind(tz, cdev);
+	}
+	mutex_unlock(&thermal_list_lock);
+	if (cdev->type[0])
+		device_remove_file(&cdev->device,
+				   &dev_attr_cdev_type);
+	device_remove_file(&cdev->device, &dev_attr_max_state);
+	device_remove_file(&cdev->device, &dev_attr_cur_state);
+
+	release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
+	device_unregister(&cdev->device);
+	return;
+}
+EXPORT_SYMBOL(thermal_cooling_device_unregister);
+
+/**
+ * thermal_zone_device_register - register a new thermal zone device
+ * @type:	the thermal zone device type
+ * @trips:	the number of trip points the thermal zone support
+ * @devdata:	private device data
+ * @ops:	standard thermal zone device callbacks
+ *
+ * thermal_zone_device_unregister() must be called when the device is no
+ * longer needed.
+ */
+struct thermal_zone_device *thermal_zone_device_register(char *type,
+					int trips, void *devdata,
+					struct thermal_zone_device_ops *ops)
+{
+	struct thermal_zone_device *tz;
+	struct thermal_cooling_device *pos;
+	int result;
+	int count;
+
+	if (strlen(type) >= THERMAL_NAME_LENGTH)
+		return NULL;
+
+	if (trips > THERMAL_MAX_TRIPS || trips < 0)
+		return NULL;
+
+	if (!ops || !ops->get_temp)
+		return NULL;
+
+	tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
+	if (!tz)
+		return NULL;
+
+	INIT_LIST_HEAD(&tz->cooling_devices);
+	idr_init(&tz->idr);
+	mutex_init(&tz->lock);
+	result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id);
+	if (result) {
+		kfree(tz);
+		return NULL;
+	}
+
+	strcpy(tz->type, type);
+	tz->ops = ops;
+	tz->device.class = &thermal_class;
+	tz->devdata = devdata;
+	tz->trips = trips;
+	sprintf(tz->device.bus_id, "thermal_zone%d", tz->id);
+	result = device_register(&tz->device);
+	if (result) {
+		release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
+		kfree(tz);
+		return NULL;
+	}
+
+	/* sys I/F */
+	if (type) {
+		result = device_create_file(&tz->device, &dev_attr_type);
+		if (result)
+			goto unregister;
+	}
+
+	result = device_create_file(&tz->device, &dev_attr_temp);
+	if (result)
+		goto unregister;
+
+	if (ops->get_mode) {
+		result = device_create_file(&tz->device, &dev_attr_mode);
+		if (result)
+			goto unregister;
+	}
+
+	for (count = 0; count < trips; count++) {
+		TRIP_POINT_ATTR_ADD(&tz->device, count, result);
+		if (result)
+			goto unregister;
+	}
+
+	mutex_lock(&thermal_list_lock);
+	list_add_tail(&tz->node, &thermal_tz_list);
+	if (ops->bind)
+		list_for_each_entry(pos, &thermal_cdev_list, node) {
+			result = ops->bind(tz, pos);
+			if (result)
+				break;
+		}
+	mutex_unlock(&thermal_list_lock);
+
+	if (!result)
+		return tz;
+
+      unregister:
+	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
+	device_unregister(&tz->device);
+	return NULL;
+}
+EXPORT_SYMBOL(thermal_zone_device_register);
+
+/**
+ * thermal_device_unregister - removes the registered thermal zone device
+ *
+ * @tz: the thermal zone device to remove
+ */
+void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+{
+	struct thermal_cooling_device *cdev;
+	struct thermal_zone_device *pos = NULL;
+	int count;
+
+	if (!tz)
+		return;
+
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(pos, &thermal_tz_list, node)
+	    if (pos == tz)
+		break;
+	if (pos != tz) {
+		/* thermal zone device not found */
+		mutex_unlock(&thermal_list_lock);
+		return;
+	}
+	list_del(&tz->node);
+	if (tz->ops->unbind)
+		list_for_each_entry(cdev, &thermal_cdev_list, node)
+		    tz->ops->unbind(tz, cdev);
+	mutex_unlock(&thermal_list_lock);
+
+	if (tz->type[0])
+		device_remove_file(&tz->device, &dev_attr_type);
+	device_remove_file(&tz->device, &dev_attr_temp);
+	if (tz->ops->get_mode)
+		device_remove_file(&tz->device, &dev_attr_mode);
+
+	for (count = 0; count < tz->trips; count++)
+		TRIP_POINT_ATTR_REMOVE(&tz->device, count);
+
+	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
+	idr_destroy(&tz->idr);
+	mutex_destroy(&tz->lock);
+	device_unregister(&tz->device);
+	return;
+}
+EXPORT_SYMBOL(thermal_zone_device_unregister);
+
+static int __init thermal_init(void)
+{
+	int result = 0;
+
+	result = class_register(&thermal_class);
+	if (result) {
+		idr_destroy(&thermal_tz_idr);
+		idr_destroy(&thermal_cdev_idr);
+		mutex_destroy(&thermal_idr_lock);
+		mutex_destroy(&thermal_list_lock);
+	}
+	return result;
+}
+
+static void __exit thermal_exit(void)
+{
+	class_unregister(&thermal_class);
+	idr_destroy(&thermal_tz_idr);
+	idr_destroy(&thermal_cdev_idr);
+	mutex_destroy(&thermal_idr_lock);
+	mutex_destroy(&thermal_list_lock);
+}
+
+subsys_initcall(thermal_init);
+module_exit(thermal_exit);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
new file mode 100644
index 000000000000..e4b76c7afb51
--- /dev/null
+++ b/include/linux/thermal.h
@@ -0,0 +1,90 @@
+/*
+ *  thermal.h  ($Revision: 0 $)
+ *
+ *  Copyright (C) 2008  Intel Corp
+ *  Copyright (C) 2008  Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008  Sujith Thomas <sujith.thomas@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#ifndef __THERMAL_H__
+#define __THERMAL_H__
+
+#include <linux/idr.h>
+#include <linux/device.h>
+
+struct thermal_zone_device;
+struct thermal_cooling_device;
+
+struct thermal_zone_device_ops {
+	int (*bind) (struct thermal_zone_device *,
+		     struct thermal_cooling_device *);
+	int (*unbind) (struct thermal_zone_device *,
+		       struct thermal_cooling_device *);
+	int (*get_temp) (struct thermal_zone_device *, char *);
+	int (*get_mode) (struct thermal_zone_device *, char *);
+	int (*set_mode) (struct thermal_zone_device *, const char *);
+	int (*get_trip_type) (struct thermal_zone_device *, int, char *);
+	int (*get_trip_temp) (struct thermal_zone_device *, int, char *);
+};
+
+struct thermal_cooling_device_ops {
+	int (*get_max_state) (struct thermal_cooling_device *, char *);
+	int (*get_cur_state) (struct thermal_cooling_device *, char *);
+	int (*set_cur_state) (struct thermal_cooling_device *, unsigned int);
+};
+
+#define THERMAL_TRIPS_NONE -1
+#define THERMAL_MAX_TRIPS 10
+#define THERMAL_NAME_LENGTH 20
+struct thermal_cooling_device {
+	int id;
+	char type[THERMAL_NAME_LENGTH];
+	struct device device;
+	void *devdata;
+	struct thermal_cooling_device_ops *ops;
+	struct list_head node;
+};
+
+struct thermal_zone_device {
+	int id;
+	char type[THERMAL_NAME_LENGTH];
+	struct device device;
+	void *devdata;
+	int trips;
+	struct thermal_zone_device_ops *ops;
+	struct list_head cooling_devices;
+	struct idr idr;
+	struct mutex lock;	/* protect cooling devices list */
+	struct list_head node;
+};
+
+struct thermal_zone_device *thermal_zone_device_register(char *, int, void *,
+					struct thermal_zone_device_ops *);
+void thermal_zone_device_unregister(struct thermal_zone_device *);
+
+int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
+				     struct thermal_cooling_device *);
+int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
+				       struct thermal_cooling_device *);
+
+struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
+					struct thermal_cooling_device_ops *);
+void thermal_cooling_device_unregister(struct thermal_cooling_device *);
+
+#endif				/* __THERMAL_H__ */
-- 
cgit 


From 041d4bbf128f645fe53bb22309efb9db14dbf5b5 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Thu, 17 Jan 2008 15:51:19 +0800
Subject: ACPI: CELSIUS_TO_KELVIN fixup

Fix an imprecision in CELSIUS_TO_KELVIN and move these
two macroes to a proper place.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Thomas Sujith <sujith.thomas@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/thermal.c  | 3 ---
 include/linux/thermal.h | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 740036355722..aee371f9daf8 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -65,9 +65,6 @@
 #define ACPI_THERMAL_MAX_ACTIVE	10
 #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
 
-#define KELVIN_TO_CELSIUS(t)    (long)(((long)t-2732>=0) ? ((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-#define CELSIUS_TO_KELVIN(t)	((t+273)*10)
-
 #define _COMPONENT		ACPI_THERMAL_COMPONENT
 ACPI_MODULE_NAME("thermal");
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e4b76c7afb51..bba7712cadc7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -61,6 +61,10 @@ struct thermal_cooling_device {
 	struct list_head node;
 };
 
+#define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
+				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
+#define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
-- 
cgit 


From 9a310d21196f38f6ad0ad146057548653e495c09 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Tue, 15 Jan 2008 17:54:43 -0600
Subject: [MTD] Factor out OF partition support from the NOR driver.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/Kconfig            |  8 ++++
 drivers/mtd/Makefile           |  1 +
 drivers/mtd/maps/physmap_of.c  | 89 +++++++++++++-----------------------------
 drivers/mtd/ofpart.c           | 74 +++++++++++++++++++++++++++++++++++
 include/linux/mtd/partitions.h |  9 ++++-
 5 files changed, 118 insertions(+), 63 deletions(-)
 create mode 100644 drivers/mtd/ofpart.c

(limited to 'include/linux')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 661eac09f5cb..e8503341e3b1 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -150,6 +150,14 @@ config MTD_AFS_PARTS
 	  for your particular device. It won't happen automatically. The
 	  'armflash' map driver (CONFIG_MTD_ARMFLASH) does this, for example.
 
+config MTD_OF_PARTS
+	tristate "Flash partition map based on OF description"
+	depends on PPC_OF && MTD_PARTITIONS
+	help
+	  This provides a partition parsing function which derives
+	  the partition map from the children of the flash node,
+	  as described in Documentation/powerpc/booting-without-of.txt.
+
 comment "User Modules And Translation Layers"
 
 config MTD_CHAR
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 7f0b04b4caa7..538e33d11d46 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_MTD_CONCAT)	+= mtdconcat.o
 obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
 obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
 obj-$(CONFIG_MTD_AFS_PARTS)	+= afs.o
+obj-$(CONFIG_MTD_OF_PARTS)      += ofpart.o
 
 # 'Users' - code which presents functionality to userspace.
 obj-$(CONFIG_MTD_CHAR)		+= mtdchar.o
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index d4bcd3f8c57c..49acd4171893 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -80,65 +80,6 @@ static int parse_obsolete_partitions(struct of_device *dev,
 
 	return nr_parts;
 }
-
-static int __devinit parse_partitions(struct of_flash *info,
-				      struct of_device *dev)
-{
-	const char *partname;
-	static const char *part_probe_types[]
-		= { "cmdlinepart", "RedBoot", NULL };
-	struct device_node *dp = dev->node, *pp;
-	int nr_parts, i;
-
-	/* First look for RedBoot table or partitions on the command
-	 * line, these take precedence over device tree information */
-	nr_parts = parse_mtd_partitions(info->mtd, part_probe_types,
-					&info->parts, 0);
-	if (nr_parts > 0)
-		return nr_parts;
-
-	/* First count the subnodes */
-	nr_parts = 0;
-	for (pp = of_get_next_child(dp, NULL); pp;
-	     pp = of_get_next_child(dp, pp))
-		nr_parts++;
-
-	if (nr_parts == 0)
-		return parse_obsolete_partitions(dev, info, dp);
-
-	info->parts = kzalloc(nr_parts * sizeof(*info->parts),
-			      GFP_KERNEL);
-	if (!info->parts)
-		return -ENOMEM;
-
-	for (pp = of_get_next_child(dp, NULL), i = 0; pp;
-	     pp = of_get_next_child(dp, pp), i++) {
-		const u32 *reg;
-		int len;
-
-		reg = of_get_property(pp, "reg", &len);
-		if (!reg || (len != 2*sizeof(u32))) {
-			of_node_put(pp);
-			dev_err(&dev->dev, "Invalid 'reg' on %s\n",
-				dp->full_name);
-			kfree(info->parts);
-			info->parts = NULL;
-			return -EINVAL;
-		}
-		info->parts[i].offset = reg[0];
-		info->parts[i].size = reg[1];
-
-		partname = of_get_property(pp, "label", &len);
-		if (!partname)
-			partname = of_get_property(pp, "name", &len);
-		info->parts[i].name = (char *)partname;
-
-		if (of_get_property(pp, "read-only", &len))
-			info->parts[i].mask_flags = MTD_WRITEABLE;
-	}
-
-	return nr_parts;
-}
 #else /* MTD_PARTITIONS */
 #define	OF_FLASH_PARTS(info)		(0)
 #define parse_partitions(info, dev)	(0)
@@ -213,6 +154,10 @@ static struct mtd_info * __devinit obsolete_probe(struct of_device *dev,
 static int __devinit of_flash_probe(struct of_device *dev,
 				    const struct of_device_id *match)
 {
+#ifdef CONFIG_MTD_PARTITIONS
+	static const char *part_probe_types[]
+		= { "cmdlinepart", "RedBoot", NULL };
+#endif
 	struct device_node *dp = dev->node;
 	struct resource res;
 	struct of_flash *info;
@@ -275,13 +220,33 @@ static int __devinit of_flash_probe(struct of_device *dev,
 	}
 	info->mtd->owner = THIS_MODULE;
 
-	err = parse_partitions(info, dev);
+#ifdef CONFIG_MTD_PARTITIONS
+	/* First look for RedBoot table or partitions on the command
+	 * line, these take precedence over device tree information */
+	err = parse_mtd_partitions(info->mtd, part_probe_types,
+	                           &info->parts, 0);
 	if (err < 0)
-		goto err_out;
+		return err;
+
+#ifdef CONFIG_MTD_OF_PARTS
+	if (err == 0) {
+		err = of_mtd_parse_partitions(&dev->dev, info->mtd,
+		                              dp, &info->parts);
+		if (err < 0)
+			return err;
+	}
+#endif
+
+	if (err == 0) {
+		err = parse_obsolete_partitions(dev, info, dp);
+		if (err < 0)
+			return err;
+	}
 
 	if (err > 0)
-		add_mtd_partitions(info->mtd, OF_FLASH_PARTS(info), err);
+		add_mtd_partitions(info->mtd, info->parts, err);
 	else
+#endif
 		add_mtd_device(info->mtd);
 
 	return 0;
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
new file mode 100644
index 000000000000..f86e06934cd8
--- /dev/null
+++ b/drivers/mtd/ofpart.c
@@ -0,0 +1,74 @@
+/*
+ * Flash partitions described by the OF (or flattened) device tree
+ *
+ * Copyright (C) 2006 MontaVista Software Inc.
+ * Author: Vitaly Wool <vwool@ru.mvista.com>
+ *
+ * Revised to handle newer style flash binding by:
+ *   Copyright (C) 2007 David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+int __devinit of_mtd_parse_partitions(struct device *dev,
+                                      struct mtd_info *mtd,
+                                      struct device_node *node,
+                                      struct mtd_partition **pparts)
+{
+	const char *partname;
+	struct device_node *pp;
+	int nr_parts, i;
+
+	/* First count the subnodes */
+	pp = NULL;
+	nr_parts = 0;
+	while ((pp = of_get_next_child(node, pp)))
+		nr_parts++;
+
+	if (nr_parts == 0)
+		return 0;
+
+	*pparts = kzalloc(nr_parts * sizeof(**pparts), GFP_KERNEL);
+	if (!*pparts)
+		return -ENOMEM;
+
+	pp = NULL;
+	i = 0;
+	while ((pp = of_get_next_child(node, pp))) {
+		const u32 *reg;
+		int len;
+
+		reg = of_get_property(pp, "reg", &len);
+		if (!reg || (len != 2 * sizeof(u32))) {
+			of_node_put(pp);
+			dev_err(dev, "Invalid 'reg' on %s\n", node->full_name);
+			kfree(*pparts);
+			*pparts = NULL;
+			return -EINVAL;
+		}
+		(*pparts)[i].offset = reg[0];
+		(*pparts)[i].size = reg[1];
+
+		partname = of_get_property(pp, "label", &len);
+		if (!partname)
+			partname = of_get_property(pp, "name", &len);
+		(*pparts)[i].name = (char *)partname;
+
+		if (of_get_property(pp, "read-only", &len))
+			(*pparts)[i].mask_flags = MTD_WRITEABLE;
+
+		i++;
+	}
+
+	return nr_parts;
+}
+EXPORT_SYMBOL(of_mtd_parse_partitions);
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index da6b3d6f12a7..7c37d7e55abc 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -71,5 +71,12 @@ extern int parse_mtd_partitions(struct mtd_info *master, const char **types,
 
 #define put_partition_parser(p) do { module_put((p)->owner); } while(0)
 
-#endif
+struct device;
+struct device_node;
+
+int __devinit of_mtd_parse_partitions(struct device *dev,
+                                      struct mtd_info *mtd,
+                                      struct device_node *node,
+                                      struct mtd_partition **pparts);
 
+#endif
-- 
cgit 


From e6298c6d60838495978cdbe5555dc290785bb961 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 25 Jan 2008 15:40:02 -0500
Subject: DMI: remove duplicate helper routine

Use existing dmi_get_system_info(),
Delete duplicate dmi_get_slot()

Spotted-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c          | 12 ++++++------
 drivers/firmware/dmi_scan.c |  9 ---------
 include/linux/dmi.h         |  2 --
 3 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index e53fb516f9d4..347cda21c0ed 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1220,17 +1220,17 @@ int acpi_dmi_dump(void)
 		return -1;
 
 	printk(KERN_NOTICE PREFIX "DMI System Vendor: %s\n",
-		dmi_get_slot(DMI_SYS_VENDOR));
+		dmi_get_system_info(DMI_SYS_VENDOR));
 	printk(KERN_NOTICE PREFIX "DMI Product Name: %s\n",
-		dmi_get_slot(DMI_PRODUCT_NAME));
+		dmi_get_system_info(DMI_PRODUCT_NAME));
 	printk(KERN_NOTICE PREFIX "DMI Product Version: %s\n",
-		dmi_get_slot(DMI_PRODUCT_VERSION));
+		dmi_get_system_info(DMI_PRODUCT_VERSION));
 	printk(KERN_NOTICE PREFIX "DMI Board Name: %s\n",
-		dmi_get_slot(DMI_BOARD_NAME));
+		dmi_get_system_info(DMI_BOARD_NAME));
 	printk(KERN_NOTICE PREFIX "DMI BIOS Vendor: %s\n",
-		dmi_get_slot(DMI_BIOS_VENDOR));
+		dmi_get_system_info(DMI_BIOS_VENDOR));
 	printk(KERN_NOTICE PREFIX "DMI BIOS Date: %s\n",
-		dmi_get_slot(DMI_BIOS_DATE));
+		dmi_get_system_info(DMI_BIOS_DATE));
 
 	return 0;
 }
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 9008ed5ef4ce..e0bade732376 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -489,12 +489,3 @@ int dmi_get_year(int field)
 
 	return year;
 }
-
-/**
- *	dmi_get_slot - return dmi_ident[slot]
- *	@slot:	index into dmi_ident[]
- */
-char *dmi_get_slot(int slot)
-{
-	return(dmi_ident[slot]);
-}
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 5b42a659a308..b1251b2af568 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -79,7 +79,6 @@ extern void dmi_scan_machine(void);
 extern int dmi_get_year(int field);
 extern int dmi_name_in_vendors(const char *str);
 extern int dmi_available;
-extern char *dmi_get_slot(int slot);
 
 #else
 
@@ -90,7 +89,6 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na
 static inline int dmi_get_year(int year) { return 0; }
 static inline int dmi_name_in_vendors(const char *s) { return 0; }
 #define dmi_available 0
-static inline char *dmi_get_slot(int slot) { return NULL; }
 
 #endif
 
-- 
cgit 


From 60c778b25972e095df8981dd41e99d161e8738f9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 11 Jan 2008 09:57:09 -0500
Subject: [SCTP]: Stop claiming that this is a "reference implementation"

I was notified by Randy Stewart that lksctp claims to be
"the reference implementation".  First of all, "the
refrence implementation" was the original implementation
of SCTP in usersapce written ty Randy and a few others.
Second, after looking at the definiton of 'reference implementation',
we don't really meet the requirements.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/linux/sctp.h         |  4 ++--
 include/net/sctp/auth.h      |  8 ++++----
 include/net/sctp/command.h   |  8 ++++----
 include/net/sctp/constants.h |  8 ++++----
 include/net/sctp/sctp.h      |  8 ++++----
 include/net/sctp/sm.h        |  8 ++++----
 include/net/sctp/structs.h   |  8 ++++----
 include/net/sctp/tsnmap.h    |  8 ++++----
 include/net/sctp/ulpevent.h  |  8 ++++----
 include/net/sctp/ulpqueue.h  |  6 +++---
 include/net/sctp/user.h      |  8 ++++----
 net/sctp/associola.c         |  8 ++++----
 net/sctp/auth.c              |  8 ++++----
 net/sctp/bind_addr.c         |  8 ++++----
 net/sctp/chunk.c             |  8 ++++----
 net/sctp/command.c           |  8 ++++----
 net/sctp/debug.c             | 12 ++++--------
 net/sctp/endpointola.c       | 12 ++++--------
 net/sctp/input.c             |  8 ++++----
 net/sctp/inqueue.c           |  8 ++++----
 net/sctp/ipv6.c              |  8 ++++----
 net/sctp/objcnt.c            |  8 ++++----
 net/sctp/output.c            |  8 ++++----
 net/sctp/outqueue.c          |  8 ++++----
 net/sctp/primitive.c         |  8 ++++----
 net/sctp/proc.c              |  8 ++++----
 net/sctp/protocol.c          |  8 ++++----
 net/sctp/sm_make_chunk.c     |  8 ++++----
 net/sctp/sm_sideeffect.c     |  8 ++++----
 net/sctp/sm_statefuns.c      | 10 ++++------
 net/sctp/sm_statetable.c     |  8 ++++----
 net/sctp/socket.c            |  8 ++++----
 net/sctp/ssnmap.c            |  8 ++++----
 net/sctp/sysctl.c            |  8 ++++----
 net/sctp/transport.c         |  8 ++++----
 net/sctp/tsnmap.c            |  8 ++++----
 net/sctp/ulpevent.c          |  7 ++++---
 net/sctp/ulpqueue.c          |  6 +++---
 38 files changed, 148 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 5eb38cc0e5a4..8ba1c320f975 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -10,13 +10,13 @@
  *
  * Various protocol defined structures.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 5db261a1e85e..49bc9577c61e 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -1,15 +1,15 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright 2007 Hewlett-Packard Development Company, L.P.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index c1f797673571..10ae2da6f93b 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -1,18 +1,18 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel Implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (C) 1999-2001 Cisco, Motorola
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These are the definitions needed for the command object.
  *
- * The SCTP reference implementation  is free software;
+ * This SCTP implementation  is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * the SCTP reference implementation  is distributed in the hope that it
+ * This SCTP implementation  is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index fefcba67bd1e..c32ddf0279c8 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -1,18 +1,18 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 4977b0a81535..57df27f19588 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -1,20 +1,20 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2003 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * The base lksctp header.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index bf2f5ed69c15..ef9e7ed2c82e 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -1,20 +1,20 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These are definitions needed by the state machine.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4d591bfce452..9c827a749b6f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1,18 +1,18 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *		   ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index 70a824df6f60..099211bf998d 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These are the definitions needed for the tsnmap type.  The tsnmap is used
  * to track out of order TSNs received.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 922a151eb93c..9bcfc12275e8 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -10,15 +10,15 @@
  * sctp_ulpevent type is used to carry information from the state machine
  * upwards to the ULP.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index cd33270e86dd..2e5ee0d8458d 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -11,13 +11,13 @@
  * and the core SCTP state machine.  This is the component which handles
  * reassembly and ordering.
  *
- * The SCTP reference implementation  is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * the SCTP reference implementation  is distributed in the hope that it
+ * This SCTP implementation  is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 954090b1e354..9462d6ae2f37 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2002 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * This header represents the structures and constants needed to support
  * the SCTP Extension to the Sockets API.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a016e78061f4..13931a91f667 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * This module provides the abstraction for an SCTP association.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ae367c82e512..8bb79f281774 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -1,15 +1,15 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright 2007 Hewlett-Packard Development Company, L.P.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 13fbfb449a55..a27511ebc4cb 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -1,20 +1,20 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2003
  * Copyright (c) Cisco 1999,2000
  * Copyright (c) Motorola 1999,2000,2001
  * Copyright (c) La Monte H.P. Yarroll 2001
  *
- * This file is part of the SCTP kernel reference implementation.
+ * This file is part of the SCTP kernel implementation.
  *
  * A collection class to handle the storage of transport addresses.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 619d0f2dee51..4d3128f5ccc3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -1,17 +1,17 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2003, 2004
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * This file contains the code relating the chunk abstraction.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/command.c b/net/sctp/command.c
index 3ff804757f4a..bb977330002a 100644
--- a/net/sctp/command.c
+++ b/net/sctp/command.c
@@ -1,18 +1,18 @@
-/* SCTP kernel reference Implementation Copyright (C) 1999-2001
+/* SCTP kernel implementation Copyright (C) 1999-2001
  * Cisco, Motorola, and IBM
  * Copyright 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions manipulate sctp command sequences.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 80f70aa53386..67715f4eb849 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -1,25 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
- *
- * This file is part of the implementation of the add-IP extension,
- * based on <draft-ietf-tsvwg-addip-sctp-02.txt> June 29, 2001,
- * for the SCTP kernel reference Implementation.
+ * This file is part of the SCTP kernel implementation
  *
  * This file converts numerical ID value to alphabetical names for SCTP
  * terms such as chunk type, parameter time, event type, etc.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index de6f505d6ff8..e39a0cdef184 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2002 International Business Machines, Corp.
@@ -6,21 +6,17 @@
  * Copyright (c) 2001 Nokia, Inc.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * This abstraction represents an SCTP endpoint.
  *
- * This file is part of the implementation of the add-IP extension,
- * based on <draft-ietf-tsvwg-addip-sctp-02.txt> June 29, 2001,
- * for the SCTP kernel reference Implementation.
- *
- * The SCTP reference implementation is free software;
+ * The SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * The SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d695f710fc77..57fe2f81eca8 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2003 International Business Machines, Corp.
@@ -6,17 +6,17 @@
  * Copyright (c) 2001 Nokia, Inc.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions handle all input from the IP layer into SCTP.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf4b7eb023b3..bbf5dd2a97c4 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -1,9 +1,9 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2002 International Business Machines, Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions are the methods for accessing the SCTP inqueue.
  *
@@ -11,13 +11,13 @@
  * (which might be bundles or fragments of chunks) and out of which you
  * pop SCTP whole chunks.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 74f106a7a7e9..4d7ec961ae1d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1,20 +1,20 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2002, 2004
  * Copyright (c) 2001 Nokia, Inc.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  * Copyright (c) 2002-2003 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * SCTP over IPv6.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *		   ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 2cf6ad6ff8ce..23f53dd23191 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -1,19 +1,19 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * Support for memory object debugging.  This allows one to monitor the
  * object allocations/deallocations for types instrumented for this
  * via the proc fs.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 5e811b91f21c..aa700feea76c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -1,19 +1,19 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions handle output processing.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a42af865c2ef..3c2a281347e1 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2003 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions implement the sctp_outq class.   The outqueue handles
  * bundling and queueing of outgoing SCTP chunks.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 1b2976d34ac7..8cb4f060bce6 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -1,8 +1,8 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions implement the SCTP primitive functions from Section 10.
  *
@@ -10,13 +10,13 @@
  * functions--this file is the functions which populate the struct proto
  * for SCTP which is the BOTTOM of the sockets interface.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 249973204070..330362e4ea0d 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -1,15 +1,15 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 2003 International Business Machines, Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1339742e49f1..22a16571499c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -6,17 +6,17 @@
  * Copyright (c) 2001 Nokia, Inc.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * Initialization/cleanup for SCTP protocol support.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 77383e9b3988..80b3c4f09e6c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1,22 +1,22 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2002 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions work with the state functions in sctp_sm_statefuns.c
  * to implement the state operations.  These functions implement the
  * steps which require modifying existing data structures.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 78d1a8a49bd0..28eb38eb6083 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions work with the state functions in sctp_sm_statefuns.c
  * to implement that state operations.  These functions implement the
  * steps which require modifying existing data structures.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f98658782d4f..f2ed6473feef 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1,23 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2002 Intel Corp.
  * Copyright (c) 2002      Nokia Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
- *
- * This is part of the SCTP Linux Kernel Reference Implementation.
+ * This is part of the SCTP Linux Kernel Implementation.
  *
  * These are the state functions for the state machine.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index e6016e41ffa0..d991237fb400 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -1,21 +1,21 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  * Copyright (c) 2001 Nokia, Inc.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These are the state tables for the SCTP state machine.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 710df67a6785..401dac618406 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -6,7 +6,7 @@
  * Copyright (c) 2001-2002 Nokia, Inc.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions interface with the sockets layer to implement the
  * SCTP Extensions for the Sockets API.
@@ -15,13 +15,13 @@
  * functions--this file is the functions which populate the struct proto
  * for SCTP which is the BOTTOM of the sockets interface.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index cbe2513d2822..737d330e5ffc 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -1,17 +1,17 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 2003 International Business Machines, Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions manipulate sctp SSN tracker.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 5eb6ea829b54..52910697e104 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -1,18 +1,18 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2002, 2004
  * Copyright (c) 2002 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * Sysctl related interfaces for SCTP.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index dfa109341aeb..d9f8af852b56 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -1,23 +1,23 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001-2003 International Business Machines Corp.
  * Copyright (c) 2001 Intel Corp.
  * Copyright (c) 2001 La Monte H.P. Yarroll
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * This module provides the abstraction for an SCTP tranport representing
  * a remote transport address.  For local transport addresses, we just use
  * union sctp_addr.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 1ff0daade304..f3e58b275905 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -1,20 +1,20 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
  * Copyright (c) 2001 Intel Corp.
  *
- * This file is part of the SCTP kernel reference Implementation
+ * This file is part of the SCTP kernel implementation
  *
  * These functions manipulate sctp tsn mapping array.
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 047c27df98f4..e27b11f18b7f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -8,13 +8,14 @@
  *
  * These functions manipulate an sctp event.   The struct ulpevent is used
  * to carry notifications and data to the ULP (sockets).
- * The SCTP reference implementation is free software;
+ *
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index c25caefa3bcb..18bf83776e08 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1,4 +1,4 @@
-/* SCTP kernel reference Implementation
+/* SCTP kernel implementation
  * (C) Copyright IBM Corp. 2001, 2004
  * Copyright (c) 1999-2000 Cisco, Inc.
  * Copyright (c) 1999-2001 Motorola, Inc.
@@ -8,13 +8,13 @@
  *
  * This abstraction carries sctp events to the ULP (sockets).
  *
- * The SCTP reference implementation is free software;
+ * This SCTP implementation is free software;
  * you can redistribute it and/or modify it under the terms of
  * the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
- * The SCTP reference implementation is distributed in the hope that it
+ * This SCTP implementation is distributed in the hope that it
  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  *                 ************************
  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-- 
cgit 


From 40d25142f2ef27084fc317ac8bb5bae460c8ea72 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Fri, 1 Feb 2008 22:37:12 +0100
Subject: Generic HDLC - remove now unneeded hdlc_device_desc

Removes now unneeded struct hdlc_device_desc

Signed-off-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/wan/hdlc.c         | 24 +++++++++---------------
 drivers/net/wan/hdlc_cisco.c   |  5 +++--
 drivers/net/wan/hdlc_fr.c      |  7 ++++---
 drivers/net/wan/hdlc_ppp.c     |  2 +-
 drivers/net/wan/hdlc_raw.c     |  2 +-
 drivers/net/wan/hdlc_raw_eth.c |  2 +-
 drivers/net/wan/hdlc_x25.c     | 10 +++++-----
 include/linux/hdlc.h           | 25 +++++++------------------
 8 files changed, 31 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index d553e6f32851..39951d0c34d6 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -1,7 +1,7 @@
 /*
  * Generic HDLC support routines for Linux
  *
- * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999 - 2008 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -39,7 +39,7 @@
 #include <net/net_namespace.h>
 
 
-static const char* version = "HDLC support module revision 1.21";
+static const char* version = "HDLC support module revision 1.22";
 
 #undef DEBUG_LINK
 
@@ -66,19 +66,15 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev)
 static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
 		    struct packet_type *p, struct net_device *orig_dev)
 {
-	struct hdlc_device_desc *desc = dev_to_desc(dev);
+	struct hdlc_device *hdlc = dev_to_hdlc(dev);
 
 	if (dev->nd_net != &init_net) {
 		kfree_skb(skb);
 		return 0;
 	}
 
-	if (desc->netif_rx)
-		return desc->netif_rx(skb);
-
-	desc->stats.rx_dropped++; /* Shouldn't happen */
-	dev_kfree_skb(skb);
-	return NET_RX_DROP;
+	BUG_ON(!hdlc->proto->netif_rx);
+	return hdlc->proto->netif_rx(skb);
 }
 
 
@@ -87,7 +83,7 @@ static inline void hdlc_proto_start(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	if (hdlc->proto->start)
-		return hdlc->proto->start(dev);
+		hdlc->proto->start(dev);
 }
 
 
@@ -96,7 +92,7 @@ static inline void hdlc_proto_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	if (hdlc->proto->stop)
-		return hdlc->proto->stop(dev);
+		hdlc->proto->stop(dev);
 }
 
 
@@ -263,8 +259,7 @@ static void hdlc_setup(struct net_device *dev)
 struct net_device *alloc_hdlcdev(void *priv)
 {
 	struct net_device *dev;
-	dev = alloc_netdev(sizeof(struct hdlc_device_desc) +
-			   sizeof(hdlc_device), "hdlc%d", hdlc_setup);
+	dev = alloc_netdev(sizeof(struct hdlc_device), "hdlc%d", hdlc_setup);
 	if (dev)
 		dev_to_hdlc(dev)->priv = priv;
 	return dev;
@@ -281,7 +276,7 @@ void unregister_hdlc_device(struct net_device *dev)
 
 
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
-			 int (*rx)(struct sk_buff *skb), size_t size)
+			 size_t size)
 {
 	detach_hdlc_protocol(dev);
 
@@ -297,7 +292,6 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			return -ENOBUFS;
 		}
 	dev_to_hdlc(dev)->proto = proto;
-	dev_to_desc(dev)->netif_rx = rx;
 	return 0;
 }
 
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 038a6e748bbf..7133c688cf20 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -250,7 +250,7 @@ static int cisco_rx(struct sk_buff *skb)
 	return NET_RX_DROP;
 
  rx_error:
-	dev_to_desc(dev)->stats.rx_errors++; /* Mark error */
+	dev_to_hdlc(dev)->stats.rx_errors++; /* Mark error */
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -314,6 +314,7 @@ static struct hdlc_proto proto = {
 	.stop		= cisco_stop,
 	.type_trans	= cisco_type_trans,
 	.ioctl		= cisco_ioctl,
+	.netif_rx	= cisco_rx,
 	.module		= THIS_MODULE,
 };
 
@@ -360,7 +361,7 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		result = attach_hdlc_protocol(dev, &proto, cisco_rx,
+		result = attach_hdlc_protocol(dev, &proto,
 					      sizeof(struct cisco_state));
 		if (result)
 			return result;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 51296c2b8b89..2bd609c27068 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -956,7 +956,7 @@ static int fr_rx(struct sk_buff *skb)
 
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		dev_to_desc(frad)->stats.rx_dropped++;
+		dev_to_hdlc(frad)->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
 
@@ -1017,7 +1017,7 @@ static int fr_rx(struct sk_buff *skb)
 	}
 
  rx_error:
-	dev_to_desc(frad)->stats.rx_errors++; /* Mark error */
+	dev_to_hdlc(frad)->stats.rx_errors++; /* Mark error */
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -1217,6 +1217,7 @@ static struct hdlc_proto proto = {
 	.stop		= fr_stop,
 	.detach		= fr_destroy,
 	.ioctl		= fr_ioctl,
+	.netif_rx	= fr_rx,
 	.module		= THIS_MODULE,
 };
 
@@ -1275,7 +1276,7 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 			return result;
 
 		if (dev_to_hdlc(dev)->proto != &proto) { /* Different proto */
-			result = attach_hdlc_protocol(dev, &proto, fr_rx,
+			result = attach_hdlc_protocol(dev, &proto,
 						      sizeof(struct frad_state));
 			if (result)
 				return result;
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 519e1550e2e7..10396d9686f4 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -122,7 +122,7 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		result = attach_hdlc_protocol(dev, &proto, NULL,
+		result = attach_hdlc_protocol(dev, &proto,
 					      sizeof(struct ppp_state));
 		if (result)
 			return result;
diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c
index e23bc6656267..bbbb819d764c 100644
--- a/drivers/net/wan/hdlc_raw.c
+++ b/drivers/net/wan/hdlc_raw.c
@@ -82,7 +82,7 @@ static int raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		result = attach_hdlc_protocol(dev, &proto, NULL,
+		result = attach_hdlc_protocol(dev, &proto,
 					      sizeof(raw_hdlc_proto));
 		if (result)
 			return result;
diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c
index 8895394e6006..11b16bdfe6aa 100644
--- a/drivers/net/wan/hdlc_raw_eth.c
+++ b/drivers/net/wan/hdlc_raw_eth.c
@@ -96,7 +96,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		result = attach_hdlc_protocol(dev, &proto, NULL,
+		result = attach_hdlc_protocol(dev, &proto,
 					      sizeof(raw_hdlc_proto));
 		if (result)
 			return result;
diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index cd7b22f50edc..c15cc11e399b 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -164,17 +164,17 @@ static void x25_close(struct net_device *dev)
 
 static int x25_rx(struct sk_buff *skb)
 {
-	struct hdlc_device_desc *desc = dev_to_desc(skb->dev);
+	struct hdlc_device *hdlc = dev_to_hdlc(skb->dev);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		desc->stats.rx_dropped++;
+		hdlc->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
 
 	if (lapb_data_received(skb->dev, skb) == LAPB_OK)
 		return NET_RX_SUCCESS;
 
-	desc->stats.rx_errors++;
+	hdlc->stats.rx_errors++;
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -184,6 +184,7 @@ static struct hdlc_proto proto = {
 	.open		= x25_open,
 	.close		= x25_close,
 	.ioctl		= x25_ioctl,
+	.netif_rx	= x25_rx,
 	.module		= THIS_MODULE,
 };
 
@@ -211,8 +212,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		if ((result = attach_hdlc_protocol(dev, &proto,
-						   x25_rx, 0)) != 0)
+		if ((result = attach_hdlc_protocol(dev, &proto, 0)))
 			return result;
 		dev->hard_start_xmit = x25_xmit;
 		dev->type = ARPHRD_X25;
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index db390c511ada..6115545a5b9c 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -26,13 +26,6 @@
 #include <linux/netdevice.h>
 #include <linux/hdlc/ioctl.h>
 
-
-/* Used by all network devices here, pointed to by netdev_priv(dev) */
-struct hdlc_device_desc {
-	int (*netif_rx)(struct sk_buff *skb);
-	struct net_device_stats stats;
-};
-
 /* This structure is a private property of HDLC protocols.
    Hardware drivers have no interest here */
 
@@ -44,12 +37,15 @@ struct hdlc_proto {
 	void (*detach)(struct net_device *dev);
 	int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
 	__be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev);
+	int (*netif_rx)(struct sk_buff *skb);
 	struct module *module;
 	struct hdlc_proto *next; /* next protocol in the list */
 };
 
 
+/* Pointed to by dev->priv */
 typedef struct hdlc_device {
+	struct net_device_stats stats;
 	/* used by HDLC layer to take control over HDLC device from hw driver*/
 	int (*attach)(struct net_device *dev,
 		      unsigned short encoding, unsigned short parity);
@@ -83,18 +79,11 @@ void unregister_hdlc_protocol(struct hdlc_proto *proto);
 
 struct net_device *alloc_hdlcdev(void *priv);
 
-
-static __inline__ struct hdlc_device_desc* dev_to_desc(struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
-static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev)
+static inline struct hdlc_device* dev_to_hdlc(struct net_device *dev)
 {
-	return netdev_priv(dev) + sizeof(struct hdlc_device_desc);
+	return dev->priv;
 }
 
-
 static __inline__ void debug_frame(const struct sk_buff *skb)
 {
 	int i;
@@ -116,13 +105,13 @@ int hdlc_open(struct net_device *dev);
 void hdlc_close(struct net_device *dev);
 
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
-			 int (*rx)(struct sk_buff *skb), size_t size);
+			 size_t size);
 /* May be used by hardware driver to gain control over HDLC device */
 void detach_hdlc_protocol(struct net_device *dev);
 
 static __inline__ struct net_device_stats *hdlc_stats(struct net_device *dev)
 {
-	return &dev_to_desc(dev)->stats;
+	return &dev_to_hdlc(dev)->stats;
 }
 
 
-- 
cgit 


From 532031d7f426eb02f854d13184416cabcb01bdd5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 4 Feb 2008 23:58:42 -0800
Subject: b43: fix build with CONFIG_SSB_PCIHOST=n

m68k allmodconfig gives

drivers/net/wireless/b43/main.c:251: error: implicit declaration of function 'mmiowb'

because CONFIG_B43=m, CONFIG_SSB_PCIHOST=n.

Might be Kconfig bustage, but this works...

Cc: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index e18f5c23b930..9d5da8b2ccf9 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -373,6 +373,15 @@ void ssb_pcihost_set_power_state(struct ssb_device *sdev, pci_power_t state)
 	if (sdev->bus->bustype == SSB_BUSTYPE_PCI)
 		pci_set_power_state(sdev->bus->host_pci, state);
 }
+#else
+static inline void ssb_pcihost_unregister(struct pci_driver *driver)
+{
+}
+
+static inline
+void ssb_pcihost_set_power_state(struct ssb_device *sdev, pci_power_t state)
+{
+}
 #endif /* CONFIG_SSB_PCIHOST */
 
 
-- 
cgit 


From bff431e49ff531a343fbb2b4426e313000844f32 Mon Sep 17 00:00:00 2001
From: Carlos Corbacho <carlos@strangeworlds.co.uk>
Date: Tue, 5 Feb 2008 02:17:04 +0000
Subject: ACPI: WMI: Add ACPI-WMI mapping driver

The following is an implementation of the Windows Management
Instrumentation (WMI) ACPI interface mapper (PNP0C14).

What it does:

Parses the _WDG method and exports functions to process WMI method calls,
data block query/ set commands (both based on GUID) and does basic event
handling.

How: WMI presents an in kernel interface here (essentially, a minimal
wrapper around ACPI)

(const char *guid assume the 36 character ASCII representation of
a GUID - e.g. 67C3371D-95A3-4C37-BB61-DD47B491DAAB)

wmi_evaluate_method(const char *guid, u8 instance, u32 method_id,
const struct acpi_buffer *in, struct acpi_buffer *out)

wmi_query_block(const char *guid, u8 instance,
struct acpi_buffer *out)

wmi_set_block(const char *guid, u38 instance,
const struct acpi_buffer *in)

wmi_install_notify_handler(acpi_notify_handler handler);

wmi_remove_notify_handler(void);

wmi_get_event_data(u32 event, struct acpi_buffer *out)

wmi_has_guid(const char guid*)

wmi_has_guid() is a helper function to find if a GUID exists or not on the
system (a quick and easy way for WMI dependant drivers to see if the
the method/ block they want exists, since GUIDs are supposed to be unique).

Event handling - allow a WMI based driver to register a notifier handler
for each GUID with WMI. When a notification is sent to a GUID in WMI, the
handler registered with WMI is then called (it is left to the caller to
ask for the WMI event data associated with the GUID, if needed).

What it won't do:

Unicode - The MS article[1] calls for converting between ASCII and Unicode (or
vice versa) if a GUID is marked as "string". This is left up to the calling
driver.

Handle a MOF[1] - the WMI mapper just exports methods, data and events to
userspace. MOF handling is down to userspace.

Userspace interface - this will be added later.

[1] http://www.microsoft.com/whdc/system/pnppwr/wmi/wmi-acpi.mspx

===
ChangeLog
==

v1 (2007-10-02):

* Initial release

v2 (2007-10-05):

* Cleaned up code - split up super "wmi_evaluate_block" -> each external
  symbol now handles its own ACPI calls, rather than handing off to
  a "super" method (and in turn, is a lot simpler to read)
* Added a find_guid() symbol - return true if a given GUID exists on
  the system
* wmi_* functions now return type acpi_status (since they are just
  fancy wrappers around acpi_evaluate_object())
* Removed extra debug code

v3 (2007-10-27)

* More code clean up - now passes checkpatch.pl
* Change data block calls - ref MS spec, method ID is not required for
  them, so drop it from the function parameters.
* Const'ify guid in the function call parameters.
* Fix _WDG buffer handling - copy the data to our own private structure.
* Change WMI from tristate to bool - otherwise the external functions are
  not exported in linux/acpi.h if you try to build WMI as a module.
* Fix more flag comparisons.
* Add a maintainers entry - since I wrote this, I should take the blame
  for it.

v4 (2007-10-30)

* Add missing brace from after fixing checkpatch errors.
* Rewrote event handling - allow external drivers to register with WMI to
  handle WMI events
* Clean up flags and sanitise flag handling

v5 (2007-11-03)

* Add sysfs interface for userspace. Export events over netlink again.
* Remove module left overs, fully convert to built-in driver.
* Tweak in-kernel API to use u8 for instance, since this is what the GUID
  blocks use (so instance cannot be greater than u8).
* Export wmi_get_event_data() for in kernel WMI drivers.

v6 (2007-11-07)

* Split out userspace into a different patch

v7 (2007-11-20)

* Fix driver to handle multiple PNP0C14 devices - store all GUIDs using
  the kernel's built in list functions, and just keep adding to the list
  every time we handle a PNP0C14 devices - GUIDs will always be unique,
  and WMI callers do not know or care about different devices.
* Change WMI event handler registration to use its' own event handling
  struct; we should not pass an acpi_handle down to any WMI based drivers
  - they should be able to function with only the calls provided in WMI.
* Update my e-mail address

v8 (2007-11-28)

* Convert back to a module.
* Update Kconfig to default to building as a module.
* Remove an erroneous printk.
* Simply comments for string flag (since we now leave the handling to the
  caller).

v9 (2007-12-07)

* Add back missing MODULE_DEVICE_TABLE for autoloading
* Checkpatch fixes

v10 (2007-12-12)

* Workaround broken GUIDs declared expensive without a WCxx method.
* Minor cleanups

v11 (2007-12-17)

* More fixing for broken GUIDs declared expensive without a WCxx method.
* Add basic EmbeddedControl region handling.

v12 (2007-12-18)

* Changed EC region handling code, as per Alexey's comments.

v13 (2007-12-27)

* Changed event handling so that we can have one event handler registered
  per GUID, as per Matthew Garrett's suggestion.

v14 (2008-01-12)

* Remove ACPI debug statements

v15 (2008-02-01)

* Replace two remaining 'x == NULL' type tests with '!x'

v16 (2008-02-05)

* Change MAINTAINERS entry, as I am not, and never have been, paid to work
  on WMI
* Remove 'default' line from Kconfig

Signed-off-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
CC: Matthew Garrett <mjg59@srcf.ucam.org>
CC: Alexey Starikovskiy <aystarik@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 MAINTAINERS           |   7 +
 drivers/acpi/Kconfig  |  10 +
 drivers/acpi/Makefile |   1 +
 drivers/acpi/wmi.c    | 710 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h  |  20 ++
 5 files changed, 748 insertions(+)
 create mode 100644 drivers/acpi/wmi.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index da30a72a839c..12e26d772e7d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -252,6 +252,13 @@ L:	linux-acpi@vger.kernel.org
 W:	http://acpi.sourceforge.net/
 S:	Supported
 
+ACPI WMI DRIVER
+P:      Carlos Corbacho
+M:      carlos@strangeworlds.co.uk
+L:      linux-acpi@vger.kernel.org
+W:      http://www.lesswatts.org/projects/acpi/
+S:      Maintained
+
 ADM1025 HARDWARE MONITOR DRIVER
 P:	Jean Delvare
 M:	khali@linux-fr.org
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index ccf6ea95f68c..f60cf6fea2bb 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -199,6 +199,16 @@ config ACPI_NUMA
 	depends on (X86 || IA64)
 	default y if IA64_GENERIC || IA64_SGI_SN2
 
+config ACPI_WMI
+	tristate "WMI (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  This driver adds support for the ACPI-WMI mapper device (PNP0C14)
+	  found on some systems.
+
+	  NOTE: You will need another driver or userspace application on top of
+	  this to actually use anything defined in the ACPI-WMI mapper.
+
 config ACPI_ASUS
         tristate "ASUS/Medion Laptop Extras"
 	depends on X86
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 456446f90077..f29812a86533 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_ACPI_THERMAL)	+= thermal.o
 obj-$(CONFIG_ACPI_SYSTEM)	+= system.o event.o
 obj-$(CONFIG_ACPI_DEBUG)	+= debug.o
 obj-$(CONFIG_ACPI_NUMA)		+= numa.o
+obj-$(CONFIG_ACPI_WMI)		+= wmi.o
 obj-$(CONFIG_ACPI_ASUS)		+= asus_acpi.o
 obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
 obj-$(CONFIG_ACPI_HOTPLUG_MEMORY)	+= acpi_memhotplug.o
diff --git a/drivers/acpi/wmi.c b/drivers/acpi/wmi.c
new file mode 100644
index 000000000000..36b84ab418dd
--- /dev/null
+++ b/drivers/acpi/wmi.c
@@ -0,0 +1,710 @@
+/*
+ *  ACPI-WMI mapping driver
+ *
+ *  Copyright (C) 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk>
+ *
+ *  GUID parsing code from ldm.c is:
+ *   Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
+ *   Copyright (c) 2001-2007 Anton Altaparmakov
+ *   Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+ACPI_MODULE_NAME("wmi");
+MODULE_AUTHOR("Carlos Corbacho");
+MODULE_DESCRIPTION("ACPI-WMI Mapping Driver");
+MODULE_LICENSE("GPL");
+
+#define ACPI_WMI_CLASS "wmi"
+
+#undef PREFIX
+#define PREFIX "ACPI: WMI: "
+
+static DEFINE_MUTEX(wmi_data_lock);
+
+struct guid_block {
+	char guid[16];
+	union {
+		char object_id[2];
+		struct {
+			unsigned char notify_id;
+			unsigned char reserved;
+		};
+	};
+	u8 instance_count;
+	u8 flags;
+};
+
+struct wmi_block {
+	struct list_head list;
+	struct guid_block gblock;
+	acpi_handle handle;
+	wmi_notify_handler handler;
+	void *handler_data;
+};
+
+static struct wmi_block wmi_blocks;
+
+/*
+ * If the GUID data block is marked as expensive, we must enable and
+ * explicitily disable data collection.
+ */
+#define ACPI_WMI_EXPENSIVE   0x1
+#define ACPI_WMI_METHOD      0x2	/* GUID is a method */
+#define ACPI_WMI_STRING      0x4	/* GUID takes & returns a string */
+#define ACPI_WMI_EVENT       0x8	/* GUID is an event */
+
+static int acpi_wmi_remove(struct acpi_device *device, int type);
+static int acpi_wmi_add(struct acpi_device *device);
+
+static const struct acpi_device_id wmi_device_ids[] = {
+	{"PNP0C14", 0},
+	{"pnp0c14", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, wmi_device_ids);
+
+static struct acpi_driver acpi_wmi_driver = {
+	.name = "wmi",
+	.class = ACPI_WMI_CLASS,
+	.ids = wmi_device_ids,
+	.ops = {
+		.add = acpi_wmi_add,
+		.remove = acpi_wmi_remove,
+		},
+};
+
+/*
+ * GUID parsing functions
+ */
+
+/**
+ * wmi_parse_hexbyte - Convert a ASCII hex number to a byte
+ * @src:  Pointer to at least 2 characters to convert.
+ *
+ * Convert a two character ASCII hex string to a number.
+ *
+ * Return:  0-255  Success, the byte was parsed correctly
+ *          -1     Error, an invalid character was supplied
+ */
+static int wmi_parse_hexbyte(const u8 *src)
+{
+	unsigned int x; /* For correct wrapping */
+	int h;
+
+	/* high part */
+	x = src[0];
+	if (x - '0' <= '9' - '0') {
+		h = x - '0';
+	} else if (x - 'a' <= 'f' - 'a') {
+		h = x - 'a' + 10;
+	} else if (x - 'A' <= 'F' - 'A') {
+		h = x - 'A' + 10;
+	} else {
+		return -1;
+	}
+	h <<= 4;
+
+	/* low part */
+	x = src[1];
+	if (x - '0' <= '9' - '0')
+		return h | (x - '0');
+	if (x - 'a' <= 'f' - 'a')
+		return h | (x - 'a' + 10);
+	if (x - 'A' <= 'F' - 'A')
+		return h | (x - 'A' + 10);
+	return -1;
+}
+
+/**
+ * wmi_swap_bytes - Rearrange GUID bytes to match GUID binary
+ * @src:   Memory block holding binary GUID (16 bytes)
+ * @dest:  Memory block to hold byte swapped binary GUID (16 bytes)
+ *
+ * Byte swap a binary GUID to match it's real GUID value
+ */
+static void wmi_swap_bytes(u8 *src, u8 *dest)
+{
+	int i;
+
+	for (i = 0; i <= 3; i++)
+		memcpy(dest + i, src + (3 - i), 1);
+
+	for (i = 0; i <= 1; i++)
+		memcpy(dest + 4 + i, src + (5 - i), 1);
+
+	for (i = 0; i <= 1; i++)
+		memcpy(dest + 6 + i, src + (7 - i), 1);
+
+	memcpy(dest + 8, src + 8, 8);
+}
+
+/**
+ * wmi_parse_guid - Convert GUID from ASCII to binary
+ * @src:   36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @dest:  Memory block to hold binary GUID (16 bytes)
+ *
+ * N.B. The GUID need not be NULL terminated.
+ *
+ * Return:  'true'   @dest contains binary GUID
+ *          'false'  @dest contents are undefined
+ */
+static bool wmi_parse_guid(const u8 *src, u8 *dest)
+{
+	static const int size[] = { 4, 2, 2, 2, 6 };
+	int i, j, v;
+
+	if (src[8]  != '-' || src[13] != '-' ||
+		src[18] != '-' || src[23] != '-')
+		return false;
+
+	for (j = 0; j < 5; j++, src++) {
+		for (i = 0; i < size[j]; i++, src += 2, *dest++ = v) {
+			v = wmi_parse_hexbyte(src);
+			if (v < 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool find_guid(const char *guid_string, struct wmi_block **out)
+{
+	char tmp[16], guid_input[16];
+	struct wmi_block *wblock;
+	struct guid_block *block;
+	struct list_head *p;
+
+	wmi_parse_guid(guid_string, tmp);
+	wmi_swap_bytes(tmp, guid_input);
+
+	list_for_each(p, &wmi_blocks.list) {
+		wblock = list_entry(p, struct wmi_block, list);
+		block = &wblock->gblock;
+
+		if (memcmp(block->guid, guid_input, 16) == 0) {
+			if (out)
+				*out = wblock;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Exported WMI functions
+ */
+/**
+ * wmi_evaluate_method - Evaluate a WMI method
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @instance: Instance index
+ * @method_id: Method ID to call
+ * &in: Buffer containing input for the method call
+ * &out: Empty buffer to return the method results
+ *
+ * Call an ACPI-WMI method
+ */
+acpi_status wmi_evaluate_method(const char *guid_string, u8 instance,
+u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out)
+{
+	struct guid_block *block = NULL;
+	struct wmi_block *wblock = NULL;
+	acpi_handle handle;
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object params[3];
+	char method[4] = "WM";
+
+	if (!find_guid(guid_string, &wblock))
+		return AE_BAD_ADDRESS;
+
+	block = &wblock->gblock;
+	handle = wblock->handle;
+
+	if (!block->flags & ACPI_WMI_METHOD)
+		return AE_BAD_DATA;
+
+	if (block->instance_count < instance)
+		return AE_BAD_PARAMETER;
+
+	input.count = 2;
+	input.pointer = params;
+	params[0].type = ACPI_TYPE_INTEGER;
+	params[0].integer.value = instance;
+	params[1].type = ACPI_TYPE_INTEGER;
+	params[1].integer.value = method_id;
+
+	if (in) {
+		input.count = 3;
+
+		if (block->flags & ACPI_WMI_STRING) {
+			params[2].type = ACPI_TYPE_STRING;
+		} else {
+			params[2].type = ACPI_TYPE_BUFFER;
+		}
+		params[2].buffer.length = in->length;
+		params[2].buffer.pointer = in->pointer;
+	}
+
+	strncat(method, block->object_id, 2);
+
+	status = acpi_evaluate_object(handle, method, &input, out);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(wmi_evaluate_method);
+
+/**
+ * wmi_query_block - Return contents of a WMI block
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @instance: Instance index
+ * &out: Empty buffer to return the contents of the data block to
+ *
+ * Return the contents of an ACPI-WMI data block to a buffer
+ */
+acpi_status wmi_query_block(const char *guid_string, u8 instance,
+struct acpi_buffer *out)
+{
+	struct guid_block *block = NULL;
+	struct wmi_block *wblock = NULL;
+	acpi_handle handle;
+	acpi_status status, wc_status = AE_ERROR;
+	struct acpi_object_list input, wc_input;
+	union acpi_object wc_params[1], wq_params[1];
+	char method[4];
+	char wc_method[4] = "WC";
+
+	if (!guid_string || !out)
+		return AE_BAD_PARAMETER;
+
+	if (!find_guid(guid_string, &wblock))
+		return AE_BAD_ADDRESS;
+
+	block = &wblock->gblock;
+	handle = wblock->handle;
+
+	if (block->instance_count < instance)
+		return AE_BAD_PARAMETER;
+
+	/* Check GUID is a data block */
+	if (block->flags & (ACPI_WMI_EVENT | ACPI_WMI_METHOD))
+		return AE_BAD_ADDRESS;
+
+	input.count = 1;
+	input.pointer = wq_params;
+	wq_params[0].type = ACPI_TYPE_INTEGER;
+	wq_params[0].integer.value = instance;
+
+	/*
+	 * If ACPI_WMI_EXPENSIVE, call the relevant WCxx method first to
+	 * enable collection.
+	 */
+	if (block->flags & ACPI_WMI_EXPENSIVE) {
+		wc_input.count = 1;
+		wc_input.pointer = wc_params;
+		wc_params[0].type = ACPI_TYPE_INTEGER;
+		wc_params[0].integer.value = 1;
+
+		strncat(wc_method, block->object_id, 2);
+
+		/*
+		 * Some GUIDs break the specification by declaring themselves
+		 * expensive, but have no corresponding WCxx method. So we
+		 * should not fail if this happens.
+		 */
+		wc_status = acpi_evaluate_object(handle, wc_method,
+			&wc_input, NULL);
+	}
+
+	strcpy(method, "WQ");
+	strncat(method, block->object_id, 2);
+
+	status = acpi_evaluate_object(handle, method, NULL, out);
+
+	/*
+	 * If ACPI_WMI_EXPENSIVE, call the relevant WCxx method, even if
+	 * the WQxx method failed - we should disable collection anyway.
+	 */
+	if ((block->flags & ACPI_WMI_EXPENSIVE) && wc_status) {
+		wc_params[0].integer.value = 0;
+		status = acpi_evaluate_object(handle,
+		wc_method, &wc_input, NULL);
+	}
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(wmi_query_block);
+
+/**
+ * wmi_set_block - Write to a WMI block
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @instance: Instance index
+ * &in: Buffer containing new values for the data block
+ *
+ * Write the contents of the input buffer to an ACPI-WMI data block
+ */
+acpi_status wmi_set_block(const char *guid_string, u8 instance,
+const struct acpi_buffer *in)
+{
+	struct guid_block *block = NULL;
+	struct wmi_block *wblock = NULL;
+	acpi_handle handle;
+	struct acpi_object_list input;
+	union acpi_object params[2];
+	char method[4] = "WS";
+
+	if (!guid_string || !in)
+		return AE_BAD_DATA;
+
+	if (!find_guid(guid_string, &wblock))
+		return AE_BAD_ADDRESS;
+
+	block = &wblock->gblock;
+	handle = wblock->handle;
+
+	if (block->instance_count < instance)
+		return AE_BAD_PARAMETER;
+
+	/* Check GUID is a data block */
+	if (block->flags & (ACPI_WMI_EVENT | ACPI_WMI_METHOD))
+		return AE_BAD_ADDRESS;
+
+	input.count = 2;
+	input.pointer = params;
+	params[0].type = ACPI_TYPE_INTEGER;
+	params[0].integer.value = instance;
+
+	if (block->flags & ACPI_WMI_STRING) {
+		params[1].type = ACPI_TYPE_STRING;
+	} else {
+		params[1].type = ACPI_TYPE_BUFFER;
+	}
+	params[1].buffer.length = in->length;
+	params[1].buffer.pointer = in->pointer;
+
+	strncat(method, block->object_id, 2);
+
+	return acpi_evaluate_object(handle, method, &input, NULL);
+}
+EXPORT_SYMBOL_GPL(wmi_set_block);
+
+/**
+ * wmi_install_notify_handler - Register handler for WMI events
+ * @handler: Function to handle notifications
+ * @data: Data to be returned to handler when event is fired
+ *
+ * Register a handler for events sent to the ACPI-WMI mapper device.
+ */
+acpi_status wmi_install_notify_handler(const char *guid,
+wmi_notify_handler handler, void *data)
+{
+	struct wmi_block *block;
+
+	if (!guid || !handler)
+		return AE_BAD_PARAMETER;
+
+	find_guid(guid, &block);
+	if (!block)
+		return AE_NOT_EXIST;
+
+	if (block->handler)
+		return AE_ALREADY_ACQUIRED;
+
+	block->handler = handler;
+	block->handler_data = data;
+
+	return AE_OK;
+}
+EXPORT_SYMBOL_GPL(wmi_install_notify_handler);
+
+/**
+ * wmi_uninstall_notify_handler - Unregister handler for WMI events
+ *
+ * Unregister handler for events sent to the ACPI-WMI mapper device.
+ */
+acpi_status wmi_remove_notify_handler(const char *guid)
+{
+	struct wmi_block *block;
+
+	if (!guid)
+		return AE_BAD_PARAMETER;
+
+	find_guid(guid, &block);
+	if (!block)
+		return AE_NOT_EXIST;
+
+	if (!block->handler)
+		return AE_NULL_ENTRY;
+
+	block->handler = NULL;
+	block->handler_data = NULL;
+
+	return AE_OK;
+}
+EXPORT_SYMBOL_GPL(wmi_remove_notify_handler);
+
+/**
+ * wmi_get_event_data - Get WMI data associated with an event
+ *
+ * @event - Event to find
+ * &out - Buffer to hold event data
+ *
+ * Returns extra data associated with an event in WMI.
+ */
+acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
+{
+	struct acpi_object_list input;
+	union acpi_object params[1];
+	struct guid_block *gblock;
+	struct wmi_block *wblock;
+	struct list_head *p;
+
+	input.count = 1;
+	input.pointer = params;
+	params[0].type = ACPI_TYPE_INTEGER;
+	params[0].integer.value = event;
+
+	list_for_each(p, &wmi_blocks.list) {
+		wblock = list_entry(p, struct wmi_block, list);
+		gblock = &wblock->gblock;
+
+		if ((gblock->flags & ACPI_WMI_EVENT) &&
+			(gblock->notify_id == event))
+			return acpi_evaluate_object(wblock->handle, "_WED",
+				&input, out);
+	}
+
+	return AE_NOT_FOUND;
+}
+EXPORT_SYMBOL_GPL(wmi_get_event_data);
+
+/**
+ * wmi_has_guid - Check if a GUID is available
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ *
+ * Check if a given GUID is defined by _WDG
+ */
+bool wmi_has_guid(const char *guid_string)
+{
+	return find_guid(guid_string, NULL);
+}
+EXPORT_SYMBOL_GPL(wmi_has_guid);
+
+/*
+ * Parse the _WDG method for the GUID data blocks
+ */
+static __init acpi_status parse_wdg(acpi_handle handle)
+{
+	struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	struct guid_block *gblock;
+	struct wmi_block *wblock;
+	acpi_status status;
+	u32 i, total;
+
+	status = acpi_evaluate_object(handle, "_WDG", NULL, &out);
+
+	if (ACPI_FAILURE(status))
+		return status;
+
+	obj = (union acpi_object *) out.pointer;
+
+	if (obj->type != ACPI_TYPE_BUFFER)
+		return AE_ERROR;
+
+	total = obj->buffer.length / sizeof(struct guid_block);
+
+	gblock = kzalloc(obj->buffer.length, GFP_KERNEL);
+	if (!gblock)
+		return AE_NO_MEMORY;
+
+	memcpy(gblock, obj->buffer.pointer, obj->buffer.length);
+
+	for (i = 0; i < total; i++) {
+		wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL);
+		if (!wblock)
+			return AE_NO_MEMORY;
+
+		wblock->gblock = gblock[i];
+		wblock->handle = handle;
+		list_add_tail(&wblock->list, &wmi_blocks.list);
+	}
+
+	kfree(out.pointer);
+	kfree(gblock);
+
+	return status;
+}
+
+/*
+ * WMI can have EmbeddedControl access regions. In which case, we just want to
+ * hand these off to the EC driver.
+ */
+static acpi_status
+acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address,
+		      u32 bits, acpi_integer * value,
+		      void *handler_context, void *region_context)
+{
+	int result = 0, i = 0;
+	u8 temp = 0;
+
+	if ((address > 0xFF) || !value)
+		return AE_BAD_PARAMETER;
+
+	if (function != ACPI_READ && function != ACPI_WRITE)
+		return AE_BAD_PARAMETER;
+
+	if (bits != 8)
+		return AE_BAD_PARAMETER;
+
+	if (function == ACPI_READ) {
+		result = ec_read(address, &temp);
+		(*value) |= ((acpi_integer)temp) << i;
+	} else {
+		temp = 0xff & ((*value) >> i);
+		result = ec_write(address, temp);
+	}
+
+	switch (result) {
+	case -EINVAL:
+		return AE_BAD_PARAMETER;
+		break;
+	case -ENODEV:
+		return AE_NOT_FOUND;
+		break;
+	case -ETIME:
+		return AE_TIME;
+		break;
+	default:
+		return AE_OK;
+	}
+}
+
+static void acpi_wmi_notify(acpi_handle handle, u32 event, void *data)
+{
+	struct guid_block *block;
+	struct wmi_block *wblock;
+	struct list_head *p;
+	struct acpi_device *device = data;
+
+	list_for_each(p, &wmi_blocks.list) {
+		wblock = list_entry(p, struct wmi_block, list);
+		block = &wblock->gblock;
+
+		if ((block->flags & ACPI_WMI_EVENT) &&
+			(block->notify_id == event)) {
+			if (wblock->handler)
+				wblock->handler(event, wblock->handler_data);
+
+			acpi_bus_generate_netlink_event(
+				device->pnp.device_class, device->dev.bus_id,
+				event, 0);
+			break;
+		}
+	}
+}
+
+static int acpi_wmi_remove(struct acpi_device *device, int type)
+{
+	acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+		acpi_wmi_notify);
+
+	acpi_remove_address_space_handler(device->handle,
+				ACPI_ADR_SPACE_EC, &acpi_wmi_ec_space_handler);
+
+	return 0;
+}
+
+static int __init acpi_wmi_add(struct acpi_device *device)
+{
+	acpi_status status;
+	int result = 0;
+
+	status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+		acpi_wmi_notify, device);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR PREFIX "Error installing notify handler\n");
+		return -ENODEV;
+	}
+
+	status = acpi_install_address_space_handler(device->handle,
+						    ACPI_ADR_SPACE_EC,
+						    &acpi_wmi_ec_space_handler,
+						    NULL, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	status = parse_wdg(device->handle);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR PREFIX "Error installing EC region handler\n");
+		return -ENODEV;
+	}
+
+	return result;
+}
+
+static int __init acpi_wmi_init(void)
+{
+	acpi_status result;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&wmi_blocks.list);
+
+	result = acpi_bus_register_driver(&acpi_wmi_driver);
+
+	if (result < 0) {
+		printk(KERN_INFO PREFIX "Error loading mapper\n");
+	} else {
+		printk(KERN_INFO PREFIX "Mapper loaded\n");
+	}
+
+	return result;
+}
+
+static void __exit acpi_wmi_exit(void)
+{
+	struct list_head *p, *tmp;
+	struct wmi_block *wblock;
+
+	acpi_bus_unregister_driver(&acpi_wmi_driver);
+
+	list_for_each_safe(p, tmp, &wmi_blocks.list) {
+		wblock = list_entry(p, struct wmi_block, list);
+
+		list_del(p);
+		kfree(wblock);
+	}
+
+	printk(KERN_INFO PREFIX "Mapper unloaded\n");
+}
+
+subsys_initcall(acpi_wmi_init);
+module_exit(acpi_wmi_exit);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63f2e6ed698f..fab9f70fe39d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -192,6 +192,26 @@ extern int ec_transaction(u8 command,
 
 #endif /*CONFIG_ACPI_EC*/
 
+#if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)
+
+typedef void (*wmi_notify_handler) (u32 value, void *context);
+
+extern acpi_status wmi_evaluate_method(const char *guid, u8 instance,
+					u32 method_id,
+					const struct acpi_buffer *in,
+					struct acpi_buffer *out);
+extern acpi_status wmi_query_block(const char *guid, u8 instance,
+					struct acpi_buffer *out);
+extern acpi_status wmi_set_block(const char *guid, u8 instance,
+					const struct acpi_buffer *in);
+extern acpi_status wmi_install_notify_handler(const char *guid,
+					wmi_notify_handler handler, void *data);
+extern acpi_status wmi_remove_notify_handler(const char *guid);
+extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out);
+extern bool wmi_has_guid(const char *guid);
+
+#endif	/* CONFIG_ACPI_WMI */
+
 extern int acpi_blacklisted(void);
 #ifdef CONFIG_DMI
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
-- 
cgit 


From 181499356e5a9f0bcbd69adc3c6df450f6e2586d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Feb 2008 16:20:22 -0800
Subject: [VLAN]: Constify skb argument to vlan_get_tag()

Required by next patch to use it from the flow classifier.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 34f40efc7607..79504b22a932 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -327,7 +327,7 @@ static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, unsigned short t
  * 
  * Returns error if the skb is not of VLAN type
  */
-static inline int __vlan_get_tag(struct sk_buff *skb, unsigned short *tag)
+static inline int __vlan_get_tag(const struct sk_buff *skb, unsigned short *tag)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
 
@@ -347,7 +347,8 @@ static inline int __vlan_get_tag(struct sk_buff *skb, unsigned short *tag)
  * 
  * Returns error if @skb->cb[] is not set correctly
  */
-static inline int __vlan_hwaccel_get_tag(struct sk_buff *skb, unsigned short *tag)
+static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
+					 unsigned short *tag)
 {
 	struct vlan_skb_tx_cookie *cookie;
 
@@ -370,7 +371,7 @@ static inline int __vlan_hwaccel_get_tag(struct sk_buff *skb, unsigned short *ta
  * 
  * Returns error if the skb is not VLAN tagged
  */
-static inline int vlan_get_tag(struct sk_buff *skb, unsigned short *tag)
+static inline int vlan_get_tag(const struct sk_buff *skb, unsigned short *tag)
 {
 	if (skb->dev->features & NETIF_F_HW_VLAN_TX) {
 		return __vlan_hwaccel_get_tag(skb, tag);
-- 
cgit 


From 9ec138101f8a79007bc571174976a7814ed616f8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Feb 2008 16:21:04 -0800
Subject: [NET_SCHED]: cls_flow: support classification based on VLAN tag

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h |  1 +
 net/sched/cls_flow.c    | 12 ++++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 40fac8c4559d..28dfc61cf79e 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -348,6 +348,7 @@ enum
 	FLOW_KEY_RTCLASSID,
 	FLOW_KEY_SKUID,
 	FLOW_KEY_SKGID,
+	FLOW_KEY_VLAN_TAG,
 	__FLOW_KEY_MAX,
 };
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index eeb223cf14cf..971b867e0484 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/if_vlan.h>
 
 #include <net/pkt_cls.h>
 #include <net/ip.h>
@@ -270,6 +271,15 @@ static u32 flow_get_skgid(const struct sk_buff *skb)
 	return 0;
 }
 
+static u32 flow_get_vlan_tag(const struct sk_buff *skb)
+{
+	u16 uninitialized_var(tag);
+
+	if (vlan_get_tag(skb, &tag) < 0)
+		return 0;
+	return tag & VLAN_VID_MASK;
+}
+
 static u32 flow_key_get(const struct sk_buff *skb, int key)
 {
 	switch (key) {
@@ -305,6 +315,8 @@ static u32 flow_key_get(const struct sk_buff *skb, int key)
 		return flow_get_skuid(skb);
 	case FLOW_KEY_SKGID:
 		return flow_get_skgid(skb);
+	case FLOW_KEY_VLAN_TAG:
+		return flow_get_vlan_tag(skb);
 	default:
 		WARN_ON(1);
 		return 0;
-- 
cgit 


From 7c2670bbb53820d0a4fab8d74593eeccd1eef225 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Tue, 22 Jan 2008 18:46:50 +0100
Subject: ACPI: battery: add sysfs serial number

egrep serial /proc/acpi/battery/BAT0/info
serial number:           32090

serial number can tell you from the imminent danger
of beeing set on fire.

Signed-off-by: maximilian attems <max@stro.at>
Acked-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/battery.c             | 5 +++++
 drivers/power/power_supply_sysfs.c | 1 +
 include/linux/power_supply.h       | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index c4a769d1ba85..f6215e809808 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -194,6 +194,9 @@ static int acpi_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_MANUFACTURER:
 		val->strval = battery->oem_info;
 		break;
+	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
+		val->strval = battery->serial_number;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -212,6 +215,7 @@ static enum power_supply_property charge_battery_props[] = {
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
 };
 
 static enum power_supply_property energy_battery_props[] = {
@@ -226,6 +230,7 @@ static enum power_supply_property energy_battery_props[] = {
 	POWER_SUPPLY_PROP_ENERGY_NOW,
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
 };
 #endif
 
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index d4824840c5bf..ad2bed0174d6 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -116,6 +116,7 @@ static struct device_attribute power_supply_attrs[] = {
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
+	POWER_SUPPLY_ATTR(serial_number),
 };
 
 static ssize_t power_supply_show_static_attrs(struct device *dev,
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 5cbf3e371012..68ed19ccf1f7 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -94,6 +94,7 @@ enum power_supply_property {
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
 };
 
 enum power_supply_type {
-- 
cgit 


From f4eb010706b6c96c136c7aaa9079159743f33fa8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 26 Oct 2007 16:54:31 +1000
Subject: [POWERPC] Add of_get_next_parent()

Iterating through a device node's parents is simple enough, but dealing
with the refcounts properly is a little ugly, and replicating that logic
is asking for someone to get it wrong or forget it all together, eg:

while (dn != NULL) {
	/* loop body */
	tmp = of_get_parent(dn);
	of_node_put(dn);
	dn = tmp;
}

So add of_get_next_parent(), inspired by of_get_next_child().  The
contract is that it returns the parent and drops the reference on the
current node, this makes the loop look like:

while (dn != NULL) {
	/* loop body */
	dn = of_get_next_parent(dn);
}

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/of/base.c  | 25 +++++++++++++++++++++++++
 include/linux/of.h |  1 +
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b306fef1ac41..80c9deca5f35 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -137,6 +137,31 @@ struct device_node *of_get_parent(const struct device_node *node)
 }
 EXPORT_SYMBOL(of_get_parent);
 
+/**
+ *	of_get_next_parent - Iterate to a node's parent
+ *	@node:	Node to get parent of
+ *
+ * 	This is like of_get_parent() except that it drops the
+ * 	refcount on the passed node, making it suitable for iterating
+ * 	through a node's parents.
+ *
+ *	Returns a node pointer with refcount incremented, use
+ *	of_node_put() on it when done.
+ */
+struct device_node *of_get_next_parent(struct device_node *node)
+{
+	struct device_node *parent;
+
+	if (!node)
+		return NULL;
+
+	read_lock(&devtree_lock);
+	parent = of_node_get(node->parent);
+	of_node_put(node);
+	read_unlock(&devtree_lock);
+	return parent;
+}
+
 /**
  *	of_get_next_child - Iterate a node childs
  *	@node:	parent node
diff --git a/include/linux/of.h b/include/linux/of.h
index b5f33efcb8e2..6981016dcc25 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -50,6 +50,7 @@ extern struct device_node *of_find_matching_node(struct device_node *from,
 extern struct device_node *of_find_node_by_path(const char *path);
 extern struct device_node *of_find_node_by_phandle(phandle handle);
 extern struct device_node *of_get_parent(const struct device_node *node);
+extern struct device_node *of_get_next_parent(struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);
 #define for_each_child_of_node(parent, child) \
-- 
cgit 


From 0a87e3e92b299e0f1a69b36664ecde2fc296c40a Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 1 Feb 2008 18:02:30 -0500
Subject: Rename: linux/pata_platform.h to linux/ata_platform.h

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 arch/arm/mach-rpc/riscpc.c                      |  2 +-
 arch/blackfin/mach-bf527/boards/ezkit.c         |  2 +-
 arch/blackfin/mach-bf533/boards/H8606.c         |  2 +-
 arch/blackfin/mach-bf533/boards/cm_bf533.c      |  2 +-
 arch/blackfin/mach-bf533/boards/ezkit.c         |  2 +-
 arch/blackfin/mach-bf533/boards/stamp.c         |  2 +-
 arch/blackfin/mach-bf537/boards/cm_bf537.c      |  2 +-
 arch/blackfin/mach-bf537/boards/generic_board.c |  2 +-
 arch/blackfin/mach-bf537/boards/minotaur.c      |  2 +-
 arch/blackfin/mach-bf537/boards/stamp.c         |  2 +-
 arch/blackfin/mach-bf561/boards/cm_bf561.c      |  2 +-
 arch/blackfin/mach-bf561/boards/ezkit.c         |  2 +-
 arch/sh/boards/landisk/setup.c                  |  2 +-
 arch/sh/boards/lboxre2/setup.c                  |  2 +-
 arch/sh/boards/renesas/r7780rp/setup.c          |  2 +-
 arch/sh/boards/renesas/rts7751r2d/setup.c       |  2 +-
 arch/sh/boards/renesas/sdk7780/setup.c          |  2 +-
 arch/sh/boards/se/7722/setup.c                  |  2 +-
 drivers/ata/pata_of_platform.c                  |  2 +-
 drivers/ata/pata_platform.c                     |  2 +-
 drivers/ide/legacy/ide_platform.c               |  2 +-
 include/linux/ata_platform.h                    | 27 +++++++++++++++++++++++++
 include/linux/pata_platform.h                   | 27 -------------------------
 23 files changed, 48 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/ata_platform.h
 delete mode 100644 include/linux/pata_platform.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c
index a454451c97c3..eca558c6bf5d 100644
--- a/arch/arm/mach-rpc/riscpc.c
+++ b/arch/arm/mach-rpc/riscpc.c
@@ -17,7 +17,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/serial_8250.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 
 #include <asm/elf.h>
 #include <asm/io.h>
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index f8c411a24af7..1795aab79064 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -37,7 +37,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb/isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/sl811.h>
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index a72c7a620fa1..97378b0a9753 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -38,7 +38,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb/isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 
 #include <asm/dma.h>
diff --git a/arch/blackfin/mach-bf533/boards/cm_bf533.c b/arch/blackfin/mach-bf533/boards/cm_bf533.c
index 21df2f375497..886f260d9359 100644
--- a/arch/blackfin/mach-bf533/boards/cm_bf533.c
+++ b/arch/blackfin/mach-bf533/boards/cm_bf533.c
@@ -34,7 +34,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/usb/isp1362.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c
index c37dd45c8803..4026c2f3ab4e 100644
--- a/arch/blackfin/mach-bf533/boards/ezkit.c
+++ b/arch/blackfin/mach-bf533/boards/ezkit.c
@@ -35,7 +35,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/usb/isp1362.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index ac52b040b336..0185350feacc 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -38,7 +38,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb/isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537.c b/arch/blackfin/mach-bf537/boards/cm_bf537.c
index 8703b67d5ec6..f7c1f964f13b 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537.c
@@ -36,7 +36,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/usb/isp1362.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
diff --git a/arch/blackfin/mach-bf537/boards/generic_board.c b/arch/blackfin/mach-bf537/boards/generic_board.c
index 3e52f3f5bd58..8a3397db1d21 100644
--- a/arch/blackfin/mach-bf537/boards/generic_board.c
+++ b/arch/blackfin/mach-bf537/boards/generic_board.c
@@ -38,7 +38,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb/isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/sl811.h>
diff --git a/arch/blackfin/mach-bf537/boards/minotaur.c b/arch/blackfin/mach-bf537/boards/minotaur.c
index b8bbba85af53..d71e0be33921 100644
--- a/arch/blackfin/mach-bf537/boards/minotaur.c
+++ b/arch/blackfin/mach-bf537/boards/minotaur.c
@@ -10,7 +10,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb_isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb_sl811.h>
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 772541548b76..119e6ea83384 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -38,7 +38,7 @@
 #if defined(CONFIG_USB_ISP1362_HCD) || defined(CONFIG_USB_ISP1362_HCD_MODULE)
 #include <linux/usb/isp1362.h>
 #endif
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/sl811.h>
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index 3a79a9061bdc..bf9e738a7c64 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -34,7 +34,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/usb/isp1362.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
index 7601c3be1b5c..ed863ce9a2d8 100644
--- a/arch/blackfin/mach-bf561/boards/ezkit.c
+++ b/arch/blackfin/mach-bf561/boards/ezkit.c
@@ -35,7 +35,7 @@
 #include <linux/spi/spi.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <asm/dma.h>
 #include <asm/bfin5xx_spi.h>
 #include <asm/portmux.h>
diff --git a/arch/sh/boards/landisk/setup.c b/arch/sh/boards/landisk/setup.c
index eda71763ecc5..2b708ec72558 100644
--- a/arch/sh/boards/landisk/setup.c
+++ b/arch/sh/boards/landisk/setup.c
@@ -14,7 +14,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/pm.h>
 #include <linux/mm.h>
 #include <asm/machvec.h>
diff --git a/arch/sh/boards/lboxre2/setup.c b/arch/sh/boards/lboxre2/setup.c
index 9c830fdc411b..c74440d38ee9 100644
--- a/arch/sh/boards/lboxre2/setup.c
+++ b/arch/sh/boards/lboxre2/setup.c
@@ -13,7 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <asm/machvec.h>
 #include <asm/addrspace.h>
 #include <asm/lboxre2.h>
diff --git a/arch/sh/boards/renesas/r7780rp/setup.c b/arch/sh/boards/renesas/r7780rp/setup.c
index a43b47726f54..f7a8d5c9d510 100644
--- a/arch/sh/boards/renesas/r7780rp/setup.c
+++ b/arch/sh/boards/renesas/r7780rp/setup.c
@@ -15,7 +15,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/types.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
diff --git a/arch/sh/boards/renesas/rts7751r2d/setup.c b/arch/sh/boards/renesas/rts7751r2d/setup.c
index 3452b072adde..a0ef81b7de37 100644
--- a/arch/sh/boards/renesas/rts7751r2d/setup.c
+++ b/arch/sh/boards/renesas/rts7751r2d/setup.c
@@ -10,7 +10,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/serial_8250.h>
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
diff --git a/arch/sh/boards/renesas/sdk7780/setup.c b/arch/sh/boards/renesas/sdk7780/setup.c
index 5df32f201870..acc5932587f1 100644
--- a/arch/sh/boards/renesas/sdk7780/setup.c
+++ b/arch/sh/boards/renesas/sdk7780/setup.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <asm/machvec.h>
 #include <asm/sdk7780.h>
 #include <asm/heartbeat.h>
diff --git a/arch/sh/boards/se/7722/setup.c b/arch/sh/boards/se/7722/setup.c
index eb97dca5b736..b1a3d9d0172f 100644
--- a/arch/sh/boards/se/7722/setup.c
+++ b/arch/sh/boards/se/7722/setup.c
@@ -12,7 +12,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <asm/machvec.h>
 #include <asm/se7722.h>
 #include <asm/io.h>
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 938f48a807eb..408da30594c4 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 
 static int __devinit pata_of_platform_probe(struct of_device *ofdev,
 					    const struct of_device_id *match)
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 224bb6c2030a..aad7adc6ea56 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -19,7 +19,7 @@
 #include <linux/ata.h>
 #include <linux/libata.h>
 #include <linux/platform_device.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 
 #define DRV_NAME "pata_platform"
 #define DRV_VERSION "1.2"
diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c
index 26c82ce602de..688fcae17488 100644
--- a/drivers/ide/legacy/ide_platform.c
+++ b/drivers/ide/legacy/ide_platform.c
@@ -17,7 +17,7 @@
 #include <linux/ide.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
-#include <linux/pata_platform.h>
+#include <linux/ata_platform.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
new file mode 100644
index 000000000000..6a7a92db294c
--- /dev/null
+++ b/include/linux/ata_platform.h
@@ -0,0 +1,27 @@
+#ifndef __LINUX_PATA_PLATFORM_H
+#define __LINUX_PATA_PLATFORM_H
+
+struct pata_platform_info {
+	/*
+	 * I/O port shift, for platforms with ports that are
+	 * constantly spaced and need larger than the 1-byte
+	 * spacing used by ata_std_ports().
+	 */
+	unsigned int ioport_shift;
+	/* 
+	 * Indicate platform specific irq types and initial
+	 * IRQ flags when call request_irq()
+	 */
+	unsigned int irq_flags;
+};
+
+extern int __devinit __pata_platform_probe(struct device *dev,
+					   struct resource *io_res,
+					   struct resource *ctl_res,
+					   struct resource *irq_res,
+					   unsigned int ioport_shift,
+					   int __pio_mask);
+
+extern int __devexit __pata_platform_remove(struct device *dev);
+
+#endif /* __LINUX_PATA_PLATFORM_H */
diff --git a/include/linux/pata_platform.h b/include/linux/pata_platform.h
deleted file mode 100644
index 6a7a92db294c..000000000000
--- a/include/linux/pata_platform.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __LINUX_PATA_PLATFORM_H
-#define __LINUX_PATA_PLATFORM_H
-
-struct pata_platform_info {
-	/*
-	 * I/O port shift, for platforms with ports that are
-	 * constantly spaced and need larger than the 1-byte
-	 * spacing used by ata_std_ports().
-	 */
-	unsigned int ioport_shift;
-	/* 
-	 * Indicate platform specific irq types and initial
-	 * IRQ flags when call request_irq()
-	 */
-	unsigned int irq_flags;
-};
-
-extern int __devinit __pata_platform_probe(struct device *dev,
-					   struct resource *io_res,
-					   struct resource *ctl_res,
-					   struct resource *irq_res,
-					   unsigned int ioport_shift,
-					   int __pio_mask);
-
-extern int __devexit __pata_platform_remove(struct device *dev);
-
-#endif /* __LINUX_PATA_PLATFORM_H */
-- 
cgit 


From f351b2d638c3cb0b95adde3549b7bfaf3f991dfa Mon Sep 17 00:00:00 2001
From: Saeed Bishara <saeed@marvell.com>
Date: Fri, 1 Feb 2008 18:08:03 -0500
Subject: sata_mv: Support SoC controllers

Marvell's Orion SoC includes SATA controllers based on Marvell's
PCI-to-SATA 88SX controllers. This patch extends the libATA sata_mv
driver to support those controllers.

[edited to use linux/ata_platform.h -jg]

Signed-off-by: Saeed Bishara <saeed@marvell.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/sata_mv.c        | 365 +++++++++++++++++++++++++++++++++++++------
 include/linux/ata_platform.h |  13 +-
 2 files changed, 330 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 3c1b5c9027db..1e97a33cd260 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -71,6 +71,8 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/ata_platform.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
@@ -179,6 +181,8 @@ enum {
 
 	HC_MAIN_IRQ_CAUSE_OFS	= 0x1d60,
 	HC_MAIN_IRQ_MASK_OFS	= 0x1d64,
+	HC_SOC_MAIN_IRQ_CAUSE_OFS = 0x20020,
+	HC_SOC_MAIN_IRQ_MASK_OFS = 0x20024,
 	PORT0_ERR		= (1 << 0),	/* shift by port # */
 	PORT0_DONE		= (1 << 1),	/* shift by port # */
 	HC0_IRQ_PEND		= 0x1ff,	/* bits 0-8 = HC0's ports */
@@ -194,11 +198,13 @@ enum {
 	TWSI_INT		= (1 << 24),
 	HC_MAIN_RSVD		= (0x7f << 25),	/* bits 31-25 */
 	HC_MAIN_RSVD_5		= (0x1fff << 19), /* bits 31-19 */
+	HC_MAIN_RSVD_SOC 	= (0x3fffffb << 6),     /* bits 31-9, 7-6 */
 	HC_MAIN_MASKED_IRQS	= (TRAN_LO_DONE | TRAN_HI_DONE |
 				   PORTS_0_7_COAL_DONE | GPIO_INT | TWSI_INT |
 				   HC_MAIN_RSVD),
 	HC_MAIN_MASKED_IRQS_5	= (PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE |
 				   HC_MAIN_RSVD_5),
+	HC_MAIN_MASKED_IRQS_SOC = (PORTS_0_3_COAL_DONE | HC_MAIN_RSVD_SOC),
 
 	/* SATAHC registers */
 	HC_CFG_OFS		= 0,
@@ -368,6 +374,7 @@ enum chip_type {
 	chip_608x,
 	chip_6042,
 	chip_7042,
+	chip_soc,
 };
 
 /* Command ReQuest Block: 32B */
@@ -424,6 +431,10 @@ struct mv_host_priv {
 	u32			hp_flags;
 	struct mv_port_signal	signal[8];
 	const struct mv_hw_ops	*ops;
+	int			n_ports;
+	void __iomem		*base;
+	void __iomem		*main_cause_reg_addr;
+	void __iomem		*main_mask_reg_addr;
 	u32			irq_cause_ofs;
 	u32			irq_mask_ofs;
 	u32			unmask_all_irqs;
@@ -482,6 +493,15 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
 static int mv6_reset_hc(struct mv_host_priv *hpriv, void __iomem *mmio,
 			unsigned int n_hc);
 static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio);
+static void mv_soc_enable_leds(struct mv_host_priv *hpriv,
+				      void __iomem *mmio);
+static void mv_soc_read_preamp(struct mv_host_priv *hpriv, int idx,
+				      void __iomem *mmio);
+static int mv_soc_reset_hc(struct mv_host_priv *hpriv,
+				  void __iomem *mmio, unsigned int n_hc);
+static void mv_soc_reset_flash(struct mv_host_priv *hpriv,
+				      void __iomem *mmio);
+static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio);
 static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio);
 static void mv_channel_reset(struct mv_host_priv *hpriv, void __iomem *mmio,
 			     unsigned int port_no);
@@ -661,6 +681,12 @@ static const struct ata_port_info mv_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &mv_iie_ops,
 	},
+	{  /* chip_soc */
+		.flags = MV_COMMON_FLAGS | MV_FLAG_SOC,
+		.pio_mask = 0x1f,      /* pio0-4 */
+		.udma_mask = ATA_UDMA6,
+		.port_ops = &mv_iie_ops,
+	},
 };
 
 static const struct pci_device_id mv_pci_tbl[] = {
@@ -711,6 +737,15 @@ static const struct mv_hw_ops mv6xxx_ops = {
 	.reset_bus		= mv_reset_pci_bus,
 };
 
+static const struct mv_hw_ops mv_soc_ops = {
+	.phy_errata		= mv6_phy_errata,
+	.enable_leds		= mv_soc_enable_leds,
+	.read_preamp		= mv_soc_read_preamp,
+	.reset_hc		= mv_soc_reset_hc,
+	.reset_flash		= mv_soc_reset_flash,
+	.reset_bus		= mv_soc_reset_bus,
+};
+
 /*
  * Functions
  */
@@ -749,9 +784,15 @@ static inline void __iomem *mv_port_base(void __iomem *base, unsigned int port)
 		(mv_hardport_from_port(port) * MV_PORT_REG_SZ);
 }
 
+static inline void __iomem *mv_host_base(struct ata_host *host)
+{
+	struct mv_host_priv *hpriv = host->private_data;
+	return hpriv->base;
+}
+
 static inline void __iomem *mv_ap_base(struct ata_port *ap)
 {
-	return mv_port_base(ap->host->iomap[MV_PRIMARY_BAR], ap->port_no);
+	return mv_port_base(mv_host_base(ap->host), ap->port_no);
 }
 
 static inline int mv_get_hc_count(unsigned long port_flags)
@@ -1649,16 +1690,21 @@ static void mv_intr_edma(struct ata_port *ap)
  */
 static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc)
 {
-	void __iomem *mmio = host->iomap[MV_PRIMARY_BAR];
+	struct mv_host_priv *hpriv = host->private_data;
+	void __iomem *mmio = hpriv->base;
 	void __iomem *hc_mmio = mv_hc_base(mmio, hc);
 	u32 hc_irq_cause;
-	int port, port0;
+	int port, port0, last_port;
 
 	if (hc == 0)
 		port0 = 0;
 	else
 		port0 = MV_PORTS_PER_HC;
 
+	if (HAS_PCI(host))
+		last_port = port0 + MV_PORTS_PER_HC;
+	else
+		last_port = port0 + hpriv->n_ports;
 	/* we'll need the HC success int register in most cases */
 	hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS);
 	if (!hc_irq_cause)
@@ -1669,7 +1715,7 @@ static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc)
 	VPRINTK("ENTER, hc%u relevant=0x%08x HC IRQ cause=0x%08x\n",
 		hc, relevant, hc_irq_cause);
 
-	for (port = port0; port < port0 + MV_PORTS_PER_HC; port++) {
+	for (port = port0; port < port0 + last_port; port++) {
 		struct ata_port *ap = host->ports[port];
 		struct mv_port_priv *pp = ap->private_data;
 		int have_err_bits, hard_port, shift;
@@ -1764,13 +1810,15 @@ static void mv_pci_error(struct ata_host *host, void __iomem *mmio)
 static irqreturn_t mv_interrupt(int irq, void *dev_instance)
 {
 	struct ata_host *host = dev_instance;
+	struct mv_host_priv *hpriv = host->private_data;
 	unsigned int hc, handled = 0, n_hcs;
-	void __iomem *mmio = host->iomap[MV_PRIMARY_BAR];
+	void __iomem *mmio = hpriv->base;
 	u32 irq_stat, irq_mask;
 
 	spin_lock(&host->lock);
-	irq_stat = readl(mmio + HC_MAIN_IRQ_CAUSE_OFS);
-	irq_mask = readl(mmio + HC_MAIN_IRQ_MASK_OFS);
+
+	irq_stat = readl(hpriv->main_cause_reg_addr);
+	irq_mask = readl(hpriv->main_mask_reg_addr);
 
 	/* check the cases where we either have nothing pending or have read
 	 * a bogus register value which can indicate HW removal or PCI fault
@@ -1827,7 +1875,8 @@ static unsigned int mv5_scr_offset(unsigned int sc_reg_in)
 
 static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
 {
-	void __iomem *mmio = ap->host->iomap[MV_PRIMARY_BAR];
+	struct mv_host_priv *hpriv = ap->host->private_data;
+	void __iomem *mmio = hpriv->base;
 	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
@@ -1840,7 +1889,8 @@ static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
 
 static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
 {
-	void __iomem *mmio = ap->host->iomap[MV_PRIMARY_BAR];
+	struct mv_host_priv *hpriv = ap->host->private_data;
+	void __iomem *mmio = hpriv->base;
 	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
@@ -2178,6 +2228,93 @@ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
 	writel(m2, port_mmio + PHY_MODE2);
 }
 
+/* TODO: use the generic LED interface to configure the SATA Presence */
+/* & Acitivy LEDs on the board */
+static void mv_soc_enable_leds(struct mv_host_priv *hpriv,
+				      void __iomem *mmio)
+{
+	return;
+}
+
+static void mv_soc_read_preamp(struct mv_host_priv *hpriv, int idx,
+			   void __iomem *mmio)
+{
+	void __iomem *port_mmio;
+	u32 tmp;
+
+	port_mmio = mv_port_base(mmio, idx);
+	tmp = readl(port_mmio + PHY_MODE2);
+
+	hpriv->signal[idx].amps = tmp & 0x700;	/* bits 10:8 */
+	hpriv->signal[idx].pre = tmp & 0xe0;	/* bits 7:5 */
+}
+
+#undef ZERO
+#define ZERO(reg) writel(0, port_mmio + (reg))
+static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
+					void __iomem *mmio, unsigned int port)
+{
+	void __iomem *port_mmio = mv_port_base(mmio, port);
+
+	writelfl(EDMA_DS, port_mmio + EDMA_CMD_OFS);
+
+	mv_channel_reset(hpriv, mmio, port);
+
+	ZERO(0x028);		/* command */
+	writel(0x101f, port_mmio + EDMA_CFG_OFS);
+	ZERO(0x004);		/* timer */
+	ZERO(0x008);		/* irq err cause */
+	ZERO(0x00c);		/* irq err mask */
+	ZERO(0x010);		/* rq bah */
+	ZERO(0x014);		/* rq inp */
+	ZERO(0x018);		/* rq outp */
+	ZERO(0x01c);		/* respq bah */
+	ZERO(0x024);		/* respq outp */
+	ZERO(0x020);		/* respq inp */
+	ZERO(0x02c);		/* test control */
+	writel(0xbc, port_mmio + EDMA_IORDY_TMOUT);
+}
+
+#undef ZERO
+
+#define ZERO(reg) writel(0, hc_mmio + (reg))
+static void mv_soc_reset_one_hc(struct mv_host_priv *hpriv,
+				       void __iomem *mmio)
+{
+	void __iomem *hc_mmio = mv_hc_base(mmio, 0);
+
+	ZERO(0x00c);
+	ZERO(0x010);
+	ZERO(0x014);
+
+}
+
+#undef ZERO
+
+static int mv_soc_reset_hc(struct mv_host_priv *hpriv,
+				  void __iomem *mmio, unsigned int n_hc)
+{
+	unsigned int port;
+
+	for (port = 0; port < hpriv->n_ports; port++)
+		mv_soc_reset_hc_port(hpriv, mmio, port);
+
+	mv_soc_reset_one_hc(hpriv, mmio);
+
+	return 0;
+}
+
+static void mv_soc_reset_flash(struct mv_host_priv *hpriv,
+				      void __iomem *mmio)
+{
+	return;
+}
+
+static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio)
+{
+	return;
+}
+
 static void mv_channel_reset(struct mv_host_priv *hpriv, void __iomem *mmio,
 			     unsigned int port_no)
 {
@@ -2342,7 +2479,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
 {
 	struct ata_port *ap = link->ap;
 	struct mv_host_priv *hpriv = ap->host->private_data;
-	void __iomem *mmio = ap->host->iomap[MV_PRIMARY_BAR];
+	void __iomem *mmio = hpriv->base;
 
 	mv_stop_dma(ap);
 
@@ -2383,7 +2520,7 @@ static void mv_error_handler(struct ata_port *ap)
 
 static void mv_eh_freeze(struct ata_port *ap)
 {
-	void __iomem *mmio = ap->host->iomap[MV_PRIMARY_BAR];
+	struct mv_host_priv *hpriv = ap->host->private_data;
 	unsigned int hc = (ap->port_no > 3) ? 1 : 0;
 	u32 tmp, mask;
 	unsigned int shift;
@@ -2397,13 +2534,14 @@ static void mv_eh_freeze(struct ata_port *ap)
 	mask = 0x3 << shift;
 
 	/* disable assertion of portN err, done events */
-	tmp = readl(mmio + HC_MAIN_IRQ_MASK_OFS);
-	writelfl(tmp & ~mask, mmio + HC_MAIN_IRQ_MASK_OFS);
+	tmp = readl(hpriv->main_mask_reg_addr);
+	writelfl(tmp & ~mask, hpriv->main_mask_reg_addr);
 }
 
 static void mv_eh_thaw(struct ata_port *ap)
 {
-	void __iomem *mmio = ap->host->iomap[MV_PRIMARY_BAR];
+	struct mv_host_priv *hpriv = ap->host->private_data;
+	void __iomem *mmio = hpriv->base;
 	unsigned int hc = (ap->port_no > 3) ? 1 : 0;
 	void __iomem *hc_mmio = mv_hc_base(mmio, hc);
 	void __iomem *port_mmio = mv_ap_base(ap);
@@ -2430,8 +2568,8 @@ static void mv_eh_thaw(struct ata_port *ap)
 	writel(hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS);
 
 	/* enable assertion of portN err, done events */
-	tmp = readl(mmio + HC_MAIN_IRQ_MASK_OFS);
-	writelfl(tmp | mask, mmio + HC_MAIN_IRQ_MASK_OFS);
+	tmp = readl(hpriv->main_mask_reg_addr);
+	writelfl(tmp | mask, hpriv->main_mask_reg_addr);
 }
 
 /**
@@ -2598,9 +2736,13 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
 			break;
 		}
 		break;
+	case chip_soc:
+		hpriv->ops = &mv_soc_ops;
+		hp_flags |= MV_HP_ERRATA_60X1C0;
+		break;
 
 	default:
-		dev_printk(KERN_ERR, &pdev->dev,
+		dev_printk(KERN_ERR, host->dev,
 			   "BUG: invalid board index %u\n", board_idx);
 		return 1;
 	}
@@ -2633,15 +2775,25 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
 static int mv_init_host(struct ata_host *host, unsigned int board_idx)
 {
 	int rc = 0, n_hc, port, hc;
-	void __iomem *mmio = host->iomap[MV_PRIMARY_BAR];
 	struct mv_host_priv *hpriv = host->private_data;
-
-	/* global interrupt mask */
-	writel(0, mmio + HC_MAIN_IRQ_MASK_OFS);
+	void __iomem *mmio = hpriv->base;
 
 	rc = mv_chip_id(host, board_idx);
 	if (rc)
-		goto done;
+	goto done;
+
+	if (HAS_PCI(host)) {
+		hpriv->main_cause_reg_addr = hpriv->base +
+		  HC_MAIN_IRQ_CAUSE_OFS;
+		hpriv->main_mask_reg_addr = hpriv->base + HC_MAIN_IRQ_MASK_OFS;
+	} else {
+		hpriv->main_cause_reg_addr = hpriv->base +
+		  HC_SOC_MAIN_IRQ_CAUSE_OFS;
+		hpriv->main_mask_reg_addr = hpriv->base +
+		  HC_SOC_MAIN_IRQ_MASK_OFS;
+	}
+	/* global interrupt mask */
+	writel(0, hpriv->main_mask_reg_addr);
 
 	n_hc = mv_get_hc_count(host->ports[0]->flags);
 
@@ -2672,13 +2824,15 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx)
 	for (port = 0; port < host->n_ports; port++) {
 		struct ata_port *ap = host->ports[port];
 		void __iomem *port_mmio = mv_port_base(mmio, port);
-		unsigned int offset = port_mmio - mmio;
 
 		mv_port_init(&ap->ioaddr, port_mmio);
 
 #ifdef CONFIG_PCI
-		ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio");
-		ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port");
+		if (HAS_PCI(host)) {
+			unsigned int offset = port_mmio - mmio;
+			ata_port_pbar_desc(ap, MV_PRIMARY_BAR, -1, "mmio");
+			ata_port_pbar_desc(ap, MV_PRIMARY_BAR, offset, "port");
+		}
 #endif
 	}
 
@@ -2694,35 +2848,141 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx)
 		writelfl(0, hc_mmio + HC_IRQ_CAUSE_OFS);
 	}
 
-	/* Clear any currently outstanding host interrupt conditions */
-	writelfl(0, mmio + hpriv->irq_cause_ofs);
+	if (HAS_PCI(host)) {
+		/* Clear any currently outstanding host interrupt conditions */
+		writelfl(0, mmio + hpriv->irq_cause_ofs);
 
-	/* and unmask interrupt generation for host regs */
-	writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs);
+		/* and unmask interrupt generation for host regs */
+		writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs);
+		if (IS_GEN_I(hpriv))
+			writelfl(~HC_MAIN_MASKED_IRQS_5,
+				 hpriv->main_mask_reg_addr);
+		else
+			writelfl(~HC_MAIN_MASKED_IRQS,
+				 hpriv->main_mask_reg_addr);
+
+		VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x "
+			"PCI int cause/mask=0x%08x/0x%08x\n",
+			readl(hpriv->main_cause_reg_addr),
+			readl(hpriv->main_mask_reg_addr),
+			readl(mmio + hpriv->irq_cause_ofs),
+			readl(mmio + hpriv->irq_mask_ofs));
+	} else {
+		writelfl(~HC_MAIN_MASKED_IRQS_SOC,
+			 hpriv->main_mask_reg_addr);
+		VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x\n",
+			readl(hpriv->main_cause_reg_addr),
+			readl(hpriv->main_mask_reg_addr));
+	}
+done:
+	return rc;
+}
 
-	if (IS_GEN_I(hpriv))
-		writelfl(~HC_MAIN_MASKED_IRQS_5, mmio + HC_MAIN_IRQ_MASK_OFS);
-	else
-		writelfl(~HC_MAIN_MASKED_IRQS, mmio + HC_MAIN_IRQ_MASK_OFS);
+/**
+ *      mv_platform_probe - handle a positive probe of an soc Marvell
+ *      host
+ *      @pdev: platform device found
+ *
+ *      LOCKING:
+ *      Inherited from caller.
+ */
+static int mv_platform_probe(struct platform_device *pdev)
+{
+	static int printed_version;
+	const struct mv_sata_platform_data *mv_platform_data;
+	const struct ata_port_info *ppi[] =
+	    { &mv_port_info[chip_soc], NULL };
+	struct ata_host *host;
+	struct mv_host_priv *hpriv;
+	struct resource *res;
+	int n_ports, rc;
 
-	VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x "
-		"PCI int cause/mask=0x%08x/0x%08x\n",
-		readl(mmio + HC_MAIN_IRQ_CAUSE_OFS),
-		readl(mmio + HC_MAIN_IRQ_MASK_OFS),
-		readl(mmio + hpriv->irq_cause_ofs),
-		readl(mmio + hpriv->irq_mask_ofs));
+	if (!printed_version++)
+		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
 
-done:
-	return rc;
+	/*
+	 * Simple resource validation ..
+	 */
+	if (unlikely(pdev->num_resources != 2)) {
+		dev_err(&pdev->dev, "invalid number of resources\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Get the register base first
+	 */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL)
+		return -EINVAL;
+
+	/* allocate host */
+	mv_platform_data = pdev->dev.platform_data;
+	n_ports = mv_platform_data->n_ports;
+
+	host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports);
+	hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL);
+
+	if (!host || !hpriv)
+		return -ENOMEM;
+	host->private_data = hpriv;
+	hpriv->n_ports = n_ports;
+
+	host->iomap = NULL;
+	hpriv->base = ioremap(res->start, res->end - res->start + 1);
+	hpriv->base -= MV_SATAHC0_REG_BASE;
+
+	/* initialize adapter */
+	rc = mv_init_host(host, chip_soc);
+	if (rc)
+		return rc;
+
+	dev_printk(KERN_INFO, &pdev->dev,
+		   "slots %u ports %d\n", (unsigned)MV_MAX_Q_DEPTH,
+		   host->n_ports);
+
+	return ata_host_activate(host, platform_get_irq(pdev, 0), mv_interrupt,
+				 IRQF_SHARED, &mv6_sht);
+}
+
+/*
+ *
+ *      mv_platform_remove    -       unplug a platform interface
+ *      @pdev: platform device
+ *
+ *      A platform bus SATA device has been unplugged. Perform the needed
+ *      cleanup. Also called on module unload for any active devices.
+ */
+static int __devexit mv_platform_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct mv_host_priv *hpriv = host->private_data;
+	void __iomem *base = hpriv->base;
+
+	ata_host_detach(host);
+	iounmap(base);
+	return 0;
 }
 
+static struct platform_driver mv_platform_driver = {
+	.probe			= mv_platform_probe,
+	.remove			= __devexit_p(mv_platform_remove),
+	.driver			= {
+				   .name = DRV_NAME,
+				   .owner = THIS_MODULE,
+				  },
+};
+
+
 #ifdef CONFIG_PCI
-static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
+static int mv_pci_init_one(struct pci_dev *pdev,
+			   const struct pci_device_id *ent);
+
 
 static struct pci_driver mv_pci_driver = {
 	.name			= DRV_NAME,
 	.id_table		= mv_pci_tbl,
-	.probe			= mv_init_one,
+	.probe			= mv_pci_init_one,
 	.remove			= ata_pci_remove_one,
 };
 
@@ -2828,14 +3088,15 @@ static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev)
 }
 
 /**
- *      mv_init_one - handle a positive probe of a Marvell host
+ *      mv_pci_init_one - handle a positive probe of a PCI Marvell host
  *      @pdev: PCI device found
  *      @ent: PCI device ID entry for the matched host
  *
  *      LOCKING:
  *      Inherited from caller.
  */
-static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int mv_pci_init_one(struct pci_dev *pdev,
+			   const struct pci_device_id *ent)
 {
 	static int printed_version;
 	unsigned int board_idx = (unsigned int)ent->driver_data;
@@ -2855,6 +3116,7 @@ static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!host || !hpriv)
 		return -ENOMEM;
 	host->private_data = hpriv;
+	hpriv->n_ports = n_ports;
 
 	/* acquire resources */
 	rc = pcim_enable_device(pdev);
@@ -2867,6 +3129,7 @@ static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 	host->iomap = pcim_iomap_table(pdev);
+	hpriv->base = host->iomap[MV_PRIMARY_BAR];
 
 	rc = pci_go_64(pdev);
 	if (rc)
@@ -2895,11 +3158,22 @@ static int mv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 }
 #endif
 
+static int mv_platform_probe(struct platform_device *pdev);
+static int __devexit mv_platform_remove(struct platform_device *pdev);
+
 static int __init mv_init(void)
 {
 	int rc = -ENODEV;
 #ifdef CONFIG_PCI
 	rc = pci_register_driver(&mv_pci_driver);
+	if (rc < 0)
+		return rc;
+#endif
+	rc = platform_driver_register(&mv_platform_driver);
+
+#ifdef CONFIG_PCI
+	if (rc < 0)
+		pci_unregister_driver(&mv_pci_driver);
 #endif
 	return rc;
 }
@@ -2909,6 +3183,7 @@ static void __exit mv_exit(void)
 #ifdef CONFIG_PCI
 	pci_unregister_driver(&mv_pci_driver);
 #endif
+	platform_driver_unregister(&mv_platform_driver);
 }
 
 MODULE_AUTHOR("Brett Russ");
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index 6a7a92db294c..b856a2a590d9 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -1,5 +1,5 @@
-#ifndef __LINUX_PATA_PLATFORM_H
-#define __LINUX_PATA_PLATFORM_H
+#ifndef __LINUX_ATA_PLATFORM_H
+#define __LINUX_ATA_PLATFORM_H
 
 struct pata_platform_info {
 	/*
@@ -24,4 +24,11 @@ extern int __devinit __pata_platform_probe(struct device *dev,
 
 extern int __devexit __pata_platform_remove(struct device *dev);
 
-#endif /* __LINUX_PATA_PLATFORM_H */
+/*
+ * Marvell SATA private data
+ */
+struct mv_sata_platform_data {
+	int	n_ports; /* number of sata ports */
+};
+
+#endif /* __LINUX_ATA_PLATFORM_H */
-- 
cgit 


From 37198e3051b63d3184886e9bb8235e7578e82628 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Tue, 5 Feb 2008 14:06:27 +0900
Subject: libata: kill now unused n_iter and fix sata_fsl

qc->n_iter was used for libata's own sg walking before sg chaining
replaced it.  During conversion, the field and its usage in sata_fsl
were left behind.  Kill the filed and update sata_fsl.

tj: This was part of James's libata-use-block-layer-padding patch.
    Separated out by me.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/sata_fsl.c | 4 ++--
 include/linux/libata.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 922d7b2efba8..efcb66b6ccef 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -355,8 +355,8 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
 			ata_port_printk(qc->ap, KERN_ERR,
 					"s/g len unaligned : 0x%x\n", sg_len);
 
-		if ((num_prde == (SATA_FSL_MAX_PRD_DIRECT - 1)) &&
-		    (qc->n_iter + 1 != qc->n_elem)) {
+		if (num_prde == (SATA_FSL_MAX_PRD_DIRECT - 1) &&
+		    sg_next(sg) != NULL) {
 			VPRINTK("setting indirect prde\n");
 			prd_ptr_to_indirect_ext = prd;
 			prd->dba = cpu_to_le32(indirect_ext_segment_paddr);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4374c4277780..bc5a8d0c7090 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -457,7 +457,6 @@ struct ata_queued_cmd {
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
 	unsigned int		tag;
 	unsigned int		n_elem;
-	unsigned int		n_iter;
 	unsigned int		mapped_n_elem;
 
 	int			dma_dir;
@@ -1367,7 +1366,6 @@ static inline void ata_qc_reinit(struct ata_queued_cmd *qc)
 	qc->nbytes = qc->raw_nbytes = qc->curbytes = 0;
 	qc->n_elem = 0;
 	qc->mapped_n_elem = 0;
-	qc->n_iter = 0;
 	qc->err_mask = 0;
 	qc->pad_len = 0;
 	qc->last_sg = NULL;
-- 
cgit 


From d909b347591a23c5a2c324fbccd4c9c966f31c67 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 2 Feb 2008 19:30:14 -0700
Subject: async_tx: kill ASYNC_TX_ASSUME_COHERENT

Remove the unused ASYNC_TX_ASSUME_COHERENT flag.  Async_tx is
meant to hide the difference between asynchronous hardware and synchronous
software operations, this flag requires clients to understand cache
coherency consequences of the async path.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 crypto/async_tx/async_memcpy.c | 15 +++++----------
 crypto/async_tx/async_memset.c |  8 +++-----
 crypto/async_tx/async_xor.c    | 22 ++++++----------------
 include/linux/async_tx.h       |  2 --
 4 files changed, 14 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 047e533fcc5b..e8c8956ef1dd 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -35,7 +35,7 @@
  * @src: src page
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
+ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
  * @depend_tx: memcpy depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
@@ -55,20 +55,15 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (tx) { /* run the memcpy asynchronously */
 		dma_addr_t addr;
-		enum dma_data_direction dir;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
 
-		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-			DMA_NONE : DMA_FROM_DEVICE;
-
-		addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
+		addr = dma_map_page(device->dev, dest, dest_offset, len,
+				    DMA_FROM_DEVICE);
 		tx->tx_set_dest(addr, tx, 0);
 
-		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-			DMA_NONE : DMA_TO_DEVICE;
-
-		addr = dma_map_page(device->dev, src, src_offset, len, dir);
+		addr = dma_map_page(device->dev, src, src_offset, len,
+				    DMA_TO_DEVICE);
 		tx->tx_set_src(addr, tx, 0);
 
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 66ef6351202e..760972803958 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,7 +35,7 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
  * @depend_tx: memset depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
@@ -55,13 +55,11 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 	if (tx) { /* run the memset asynchronously */
 		dma_addr_t dma_addr;
-		enum dma_data_direction dir;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
-		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-			DMA_NONE : DMA_FROM_DEVICE;
 
-		dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
+		dma_addr = dma_map_page(device->dev, dest, offset, len,
+					DMA_FROM_DEVICE);
 		tx->tx_set_dest(dma_addr, tx, 0);
 
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 716885a87f07..cb41e6bbbc4d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -42,23 +42,17 @@ do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
 	dma_addr_t dma_addr;
-	enum dma_data_direction dir;
 	int i;
 
 	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
 
-	dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-		DMA_NONE : DMA_FROM_DEVICE;
-
-	dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
+	dma_addr = dma_map_page(device->dev, dest, offset, len,
+				DMA_FROM_DEVICE);
 	tx->tx_set_dest(dma_addr, tx, 0);
 
-	dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-		DMA_NONE : DMA_TO_DEVICE;
-
 	for (i = 0; i < src_cnt; i++) {
 		dma_addr = dma_map_page(device->dev, src_list[i],
-			offset, len, dir);
+			offset, len, DMA_TO_DEVICE);
 		tx->tx_set_src(dma_addr, tx, i);
 	}
 
@@ -106,7 +100,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *	ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
@@ -246,7 +240,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
@@ -270,16 +264,12 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 
 	if (tx) {
 		dma_addr_t dma_addr;
-		enum dma_data_direction dir;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
 
-		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
-			DMA_NONE : DMA_TO_DEVICE;
-
 		for (i = 0; i < src_cnt; i++) {
 			dma_addr = dma_map_page(device->dev, src_list[i],
-				offset, len, dir);
+				offset, len, DMA_TO_DEVICE);
 			tx->tx_set_src(dma_addr, tx, i);
 		}
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index bdca3f1b3213..3d59d371dd32 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -47,7 +47,6 @@ struct dma_chan_ref {
  * address is an implied source, whereas the asynchronous case it must be listed
  * as a source.  The destination address must be the first address in the source
  * array.
- * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
  * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
@@ -55,7 +54,6 @@ struct dma_chan_ref {
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
-	ASYNC_TX_ASSUME_COHERENT = (1 << 2),
 	ASYNC_TX_ACK		 = (1 << 3),
 	ASYNC_TX_DEP_ACK	 = (1 << 4),
 };
-- 
cgit 


From 0036731c88fdb5bf4f04a796a30b5e445fc57f54 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 2 Feb 2008 19:49:57 -0700
Subject: async_tx: kill tx_set_src and tx_set_dest methods

The tx_set_src and tx_set_dest methods were originally implemented to allow
an array of addresses to be passed down from async_xor to the dmaengine
driver while minimizing stack overhead.  Removing these methods allows
drivers to have all transaction parameters available at 'prep' time, saves
two function pointers in struct dma_async_tx_descriptor, and reduces the
number of indirect branches..

A consequence of moving this data to the 'prep' routine is that
multi-source routines like async_xor need temporary storage to convert an
array of linear addresses into an array of dma addresses.  In order to keep
the same stack footprint of the previous implementation the input array is
reused as storage for the dma addresses.  This requires that
sizeof(dma_addr_t) be less than or equal to sizeof(void *).  As a
consequence CONFIG_DMADEVICES now depends on !CONFIG_HIGHMEM64G.  It also
requires that drivers be able to make descriptor resources available when
the 'prep' routine is polled.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@intel.com>
---
 crypto/async_tx/async_memcpy.c |  27 +++++----
 crypto/async_tx/async_memset.c |  20 +++----
 crypto/async_tx/async_xor.c    |  94 ++++++++++++++++++++-----------
 drivers/dma/Kconfig            |   1 +
 drivers/dma/dmaengine.c        |  49 +++++++++-------
 drivers/dma/ioat_dma.c         |  39 +++++--------
 drivers/dma/iop-adma.c         | 124 ++++++++++++++---------------------------
 include/linux/dmaengine.h      |  20 +++----
 8 files changed, 178 insertions(+), 196 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index e8c8956ef1dd..faca0bc52068 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -48,26 +48,25 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 {
 	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
 	struct dma_device *device = chan ? chan->device : NULL;
-	int int_en = cb_fn ? 1 : 0;
-	struct dma_async_tx_descriptor *tx = device ?
-		device->device_prep_dma_memcpy(chan, len,
-		int_en) : NULL;
+	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (tx) { /* run the memcpy asynchronously */
-		dma_addr_t addr;
+	if (device) {
+		dma_addr_t dma_dest, dma_src;
 
-		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
+		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
+					DMA_FROM_DEVICE);
 
-		addr = dma_map_page(device->dev, dest, dest_offset, len,
-				    DMA_FROM_DEVICE);
-		tx->tx_set_dest(addr, tx, 0);
+		dma_src = dma_map_page(device->dev, src, src_offset, len,
+				       DMA_TO_DEVICE);
 
-		addr = dma_map_page(device->dev, src, src_offset, len,
-				    DMA_TO_DEVICE);
-		tx->tx_set_src(addr, tx, 0);
+		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
+						    len, cb_fn != NULL);
+	}
 
+	if (tx) {
+		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
-	} else { /* run the memcpy synchronously */
+	} else {
 		void *dest_buf, *src_buf;
 		pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 760972803958..0c94851cfd37 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -48,20 +48,20 @@ async_memset(struct page *dest, int val, unsigned int offset,
 {
 	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
 	struct dma_device *device = chan ? chan->device : NULL;
-	int int_en = cb_fn ? 1 : 0;
-	struct dma_async_tx_descriptor *tx = device ?
-		device->device_prep_dma_memset(chan, val, len,
-			int_en) : NULL;
+	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (tx) { /* run the memset asynchronously */
-		dma_addr_t dma_addr;
+	if (device) {
+		dma_addr_t dma_dest;
 
-		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
-
-		dma_addr = dma_map_page(device->dev, dest, offset, len,
+		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
-		tx->tx_set_dest(dma_addr, tx, 0);
 
+		tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
+						    cb_fn != NULL);
+	}
+
+	if (tx) {
+		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
 	} else { /* run the memset synchronously */
 		void *dest_buf;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index cb41e6bbbc4d..12cba1a4205b 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -34,29 +34,46 @@
  * 	This routine is marked __always_inline so it can be compiled away
  * 	when CONFIG_DMA_ENGINE=n
  */
-static __always_inline void
-do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
+static __always_inline struct dma_async_tx_descriptor *
+do_async_xor(struct dma_device *device,
 	struct dma_chan *chan, struct page *dest, struct page **src_list,
 	unsigned int offset, unsigned int src_cnt, size_t len,
 	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
-	dma_addr_t dma_addr;
+	dma_addr_t dma_dest;
+	dma_addr_t *dma_src = (dma_addr_t *) src_list;
+	struct dma_async_tx_descriptor *tx;
 	int i;
 
 	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
 
-	dma_addr = dma_map_page(device->dev, dest, offset, len,
+	dma_dest = dma_map_page(device->dev, dest, offset, len,
 				DMA_FROM_DEVICE);
-	tx->tx_set_dest(dma_addr, tx, 0);
 
-	for (i = 0; i < src_cnt; i++) {
-		dma_addr = dma_map_page(device->dev, src_list[i],
-			offset, len, DMA_TO_DEVICE);
-		tx->tx_set_src(dma_addr, tx, i);
+	for (i = 0; i < src_cnt; i++)
+		dma_src[i] = dma_map_page(device->dev, src_list[i], offset,
+					  len, DMA_TO_DEVICE);
+
+	/* Since we have clobbered the src_list we are committed
+	 * to doing this asynchronously.  Drivers force forward progress
+	 * in case they can not provide a descriptor
+	 */
+	tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
+					 cb_fn != NULL);
+	if (!tx) {
+		if (depend_tx)
+			dma_wait_for_async_tx(depend_tx);
+
+		while (!tx)
+			tx = device->device_prep_dma_xor(chan, dma_dest,
+							 dma_src, src_cnt, len,
+							 cb_fn != NULL);
 	}
 
 	async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+
+	return tx;
 }
 
 static void
@@ -118,7 +135,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	void *_cb_param;
 	unsigned long local_flags;
 	int xor_src_cnt;
-	int i = 0, src_off = 0, int_en;
+	int i = 0, src_off = 0;
 
 	BUG_ON(src_cnt <= 1);
 
@@ -138,20 +155,11 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 				_cb_param = cb_param;
 			}
 
-			int_en = _cb_fn ? 1 : 0;
-
-			tx = device->device_prep_dma_xor(
-				chan, xor_src_cnt, len, int_en);
-
-			if (tx) {
-				do_async_xor(tx, device, chan, dest,
-				&src_list[src_off], offset, xor_src_cnt, len,
-				local_flags, depend_tx, _cb_fn,
-				_cb_param);
-			} else /* fall through */
-				goto xor_sync;
+			tx = do_async_xor(device, chan, dest,
+					  &src_list[src_off], offset,
+					  xor_src_cnt, len, local_flags,
+					  depend_tx, _cb_fn, _cb_param);
 		} else { /* run the xor synchronously */
-xor_sync:
 			/* in the sync case the dest is an implied source
 			 * (assumes the dest is at the src_off index)
 			 */
@@ -254,23 +262,31 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 {
 	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
 	struct dma_device *device = chan ? chan->device : NULL;
-	int int_en = cb_fn ? 1 : 0;
-	struct dma_async_tx_descriptor *tx = device ?
-		device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
-			int_en) : NULL;
-	int i;
+	struct dma_async_tx_descriptor *tx = NULL;
 
 	BUG_ON(src_cnt <= 1);
 
-	if (tx) {
-		dma_addr_t dma_addr;
+	if (device) {
+		dma_addr_t *dma_src = (dma_addr_t *) src_list;
+		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
 
-		for (i = 0; i < src_cnt; i++) {
-			dma_addr = dma_map_page(device->dev, src_list[i],
-				offset, len, DMA_TO_DEVICE);
-			tx->tx_set_src(dma_addr, tx, i);
+		for (i = 0; i < src_cnt; i++)
+			dma_src[i] = dma_map_page(device->dev, src_list[i],
+						  offset, len, DMA_TO_DEVICE);
+
+		tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
+						      len, result,
+						      cb_fn != NULL);
+		if (!tx) {
+			if (depend_tx)
+				dma_wait_for_async_tx(depend_tx);
+
+			while (!tx)
+				tx = device->device_prep_dma_zero_sum(chan,
+					dma_src, src_cnt, len, result,
+					cb_fn != NULL);
 		}
 
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
@@ -305,6 +321,16 @@ EXPORT_SYMBOL_GPL(async_xor_zero_sum);
 
 static int __init async_xor_init(void)
 {
+	#ifdef CONFIG_DMA_ENGINE
+	/* To conserve stack space the input src_list (array of page pointers)
+	 * is reused to hold the array of dma addresses passed to the driver.
+	 * This conversion is only possible when dma_addr_t is less than the
+	 * the size of a pointer.  HIGHMEM64G is known to violate this
+	 * assumption.
+	 */
+	BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
+	#endif
+
 	return 0;
 }
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index c46b7c219ee9..a703deffb795 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -5,6 +5,7 @@
 menuconfig DMADEVICES
 	bool "DMA Engine support"
 	depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	depends on !HIGHMEM64G
 	help
 	  DMA engines can do asynchronous data transfers without
 	  involving the host CPU.  Currently, this framework can be
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index bcf52df30339..29965231b912 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -473,20 +473,22 @@ dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t addr;
+	dma_addr_t dma_dest, dma_src;
 	dma_cookie_t cookie;
 	int cpu;
 
-	tx = dev->device_prep_dma_memcpy(chan, len, 0);
-	if (!tx)
+	dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
 		return -ENOMEM;
+	}
 
 	tx->ack = 1;
 	tx->callback = NULL;
-	addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
-	tx->tx_set_src(addr, tx, 0);
-	addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-	tx->tx_set_dest(addr, tx, 0);
 	cookie = tx->tx_submit(tx);
 
 	cpu = get_cpu();
@@ -517,20 +519,22 @@ dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t addr;
+	dma_addr_t dma_dest, dma_src;
 	dma_cookie_t cookie;
 	int cpu;
 
-	tx = dev->device_prep_dma_memcpy(chan, len, 0);
-	if (!tx)
+	dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
 		return -ENOMEM;
+	}
 
 	tx->ack = 1;
 	tx->callback = NULL;
-	addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
-	tx->tx_set_src(addr, tx, 0);
-	addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-	tx->tx_set_dest(addr, tx, 0);
 	cookie = tx->tx_submit(tx);
 
 	cpu = get_cpu();
@@ -563,20 +567,23 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t addr;
+	dma_addr_t dma_dest, dma_src;
 	dma_cookie_t cookie;
 	int cpu;
 
-	tx = dev->device_prep_dma_memcpy(chan, len, 0);
-	if (!tx)
+	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);
+
+	if (!tx) {
+		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
 		return -ENOMEM;
+	}
 
 	tx->ack = 1;
 	tx->callback = NULL;
-	addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-	tx->tx_set_src(addr, tx, 0);
-	addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
-	tx->tx_set_dest(addr, tx, 0);
 	cookie = tx->tx_submit(tx);
 
 	cpu = get_cpu();
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 45e7b4666c7b..5bcfc55a2776 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -159,20 +159,6 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
 	return device->common.chancnt;
 }
 
-static void ioat_set_src(dma_addr_t addr,
-			 struct dma_async_tx_descriptor *tx,
-			 int index)
-{
-	tx_to_ioat_desc(tx)->src = addr;
-}
-
-static void ioat_set_dest(dma_addr_t addr,
-			  struct dma_async_tx_descriptor *tx,
-			  int index)
-{
-	tx_to_ioat_desc(tx)->dst = addr;
-}
-
 /**
  * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
  *                                 descriptors to hw
@@ -415,8 +401,6 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
 
 	memset(desc, 0, sizeof(*desc));
 	dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
-	desc_sw->async_tx.tx_set_src = ioat_set_src;
-	desc_sw->async_tx.tx_set_dest = ioat_set_dest;
 	switch (ioat_chan->device->version) {
 	case IOAT_VER_1_2:
 		desc_sw->async_tx.tx_submit = ioat1_tx_submit;
@@ -714,6 +698,8 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
 
 static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
 						struct dma_chan *chan,
+						dma_addr_t dma_dest,
+						dma_addr_t dma_src,
 						size_t len,
 						int int_en)
 {
@@ -726,6 +712,8 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
 
 	if (new) {
 		new->len = len;
+		new->dst = dma_dest;
+		new->src = dma_src;
 		return &new->async_tx;
 	} else
 		return NULL;
@@ -733,6 +721,8 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
 
 static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
 						struct dma_chan *chan,
+						dma_addr_t dma_dest,
+						dma_addr_t dma_src,
 						size_t len,
 						int int_en)
 {
@@ -749,6 +739,8 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
 
 	if (new) {
 		new->len = len;
+		new->dst = dma_dest;
+		new->src = dma_src;
 		return &new->async_tx;
 	} else
 		return NULL;
@@ -1045,7 +1037,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
 	u8 *dest;
 	struct dma_chan *dma_chan;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t addr;
+	dma_addr_t dma_dest, dma_src;
 	dma_cookie_t cookie;
 	int err = 0;
 
@@ -1073,7 +1065,12 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
 		goto out;
 	}
 
-	tx = device->common.device_prep_dma_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
+	dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
+				 DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
+				  DMA_FROM_DEVICE);
+	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+						   IOAT_TEST_SIZE, 0);
 	if (!tx) {
 		dev_err(&device->pdev->dev,
 			"Self-test prep failed, disabling\n");
@@ -1082,12 +1079,6 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
 	}
 
 	async_tx_ack(tx);
-	addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
-			      DMA_TO_DEVICE);
-	tx->tx_set_src(addr, tx, 0);
-	addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
-			      DMA_FROM_DEVICE);
-	tx->tx_set_dest(addr, tx, 0);
 	tx->callback = ioat_dma_test_callback;
 	tx->callback_param = (void *)0x8086;
 	cookie = tx->tx_submit(tx);
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index b011b5ae22a2..eda841c60690 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -443,17 +443,6 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 	return cookie;
 }
 
-static void
-iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
-	int index)
-{
-	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
-
-	/* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */
-	iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr);
-}
-
 static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
 static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
 
@@ -486,7 +475,6 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
 
 		dma_async_tx_descriptor_init(&slot->async_tx, chan);
 		slot->async_tx.tx_submit = iop_adma_tx_submit;
-		slot->async_tx.tx_set_dest = iop_adma_set_dest;
 		INIT_LIST_HEAD(&slot->chain_node);
 		INIT_LIST_HEAD(&slot->slot_node);
 		INIT_LIST_HEAD(&slot->async_tx.tx_list);
@@ -547,18 +535,9 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan)
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
 
-static void
-iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
-	int index)
-{
-	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
-	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
-
-	iop_desc_set_memcpy_src_addr(grp_start, addr);
-}
-
 static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			 dma_addr_t dma_src, size_t len, int int_en)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -578,9 +557,10 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
 		grp_start = sw_desc->group_head;
 		iop_desc_init_memcpy(grp_start, int_en);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
 		sw_desc->unmap_src_cnt = 1;
 		sw_desc->unmap_len = len;
-		sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src;
 	}
 	spin_unlock_bh(&iop_chan->lock);
 
@@ -588,8 +568,8 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
 }
 
 static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
-	int int_en)
+iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
+			 int value, size_t len, int int_en)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -610,6 +590,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
 		iop_desc_init_memset(grp_start, int_en);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_block_fill_val(grp_start, value);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		sw_desc->unmap_src_cnt = 1;
 		sw_desc->unmap_len = len;
 	}
@@ -618,19 +599,10 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
 
-static void
-iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
-	int index)
-{
-	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
-	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
-
-	iop_desc_set_xor_src_addr(grp_start, index, addr);
-}
-
 static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
-	int int_en)
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+		      int int_en)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -651,29 +623,22 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
 		grp_start = sw_desc->group_head;
 		iop_desc_init_xor(grp_start, src_cnt, int_en);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		sw_desc->unmap_src_cnt = src_cnt;
 		sw_desc->unmap_len = len;
-		sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src;
+		while (src_cnt--)
+			iop_desc_set_xor_src_addr(grp_start, src_cnt,
+						  dma_src[src_cnt]);
 	}
 	spin_unlock_bh(&iop_chan->lock);
 
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
 
-static void
-iop_adma_xor_zero_sum_set_src(dma_addr_t addr,
-				struct dma_async_tx_descriptor *tx,
-				int index)
-{
-	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
-	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
-
-	iop_desc_set_zero_sum_src_addr(grp_start, index, addr);
-}
-
 static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
-	size_t len, u32 *result, int int_en)
+iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
+			   unsigned int src_cnt, size_t len, u32 *result,
+			   int int_en)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -697,7 +662,9 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
 			__FUNCTION__, grp_start->xor_check_result);
 		sw_desc->unmap_src_cnt = src_cnt;
 		sw_desc->unmap_len = len;
-		sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src;
+		while (src_cnt--)
+			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+						       dma_src[src_cnt]);
 	}
 	spin_unlock_bh(&iop_chan->lock);
 
@@ -882,13 +849,12 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
 		goto out;
 	}
 
-	tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1);
 	dest_dma = dma_map_single(dma_chan->device->dev, dest,
 				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
-	iop_adma_set_dest(dest_dma, tx, 0);
 	src_dma = dma_map_single(dma_chan->device->dev, src,
 				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
-	iop_adma_memcpy_set_src(src_dma, tx, 0);
+	tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				      IOP_ADMA_TEST_SIZE, 1);
 
 	cookie = iop_adma_tx_submit(tx);
 	iop_adma_issue_pending(dma_chan);
@@ -929,6 +895,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 	struct page *dest;
 	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
 	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
 	dma_addr_t dma_addr, dest_dma;
 	struct dma_async_tx_descriptor *tx;
 	struct dma_chan *dma_chan;
@@ -981,17 +948,13 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 	}
 
 	/* test xor */
-	tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST,
-				PAGE_SIZE, 1);
 	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
 				PAGE_SIZE, DMA_FROM_DEVICE);
-	iop_adma_set_dest(dest_dma, tx, 0);
-
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
-		dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0,
-			PAGE_SIZE, DMA_TO_DEVICE);
-		iop_adma_xor_set_src(dma_addr, tx, i);
-	}
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				   IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE, 1);
 
 	cookie = iop_adma_tx_submit(tx);
 	iop_adma_issue_pending(dma_chan);
@@ -1032,13 +995,13 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 
 	zero_sum_result = 1;
 
-	tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
-		PAGE_SIZE, &zero_sum_result, 1);
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
-		dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
-			0, PAGE_SIZE, DMA_TO_DEVICE);
-		iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
-	}
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+					IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+					&zero_sum_result, 1);
 
 	cookie = iop_adma_tx_submit(tx);
 	iop_adma_issue_pending(dma_chan);
@@ -1060,10 +1023,9 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 	}
 
 	/* test memset */
-	tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1);
 	dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
 			PAGE_SIZE, DMA_FROM_DEVICE);
-	iop_adma_set_dest(dma_addr, tx, 0);
+	tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, 1);
 
 	cookie = iop_adma_tx_submit(tx);
 	iop_adma_issue_pending(dma_chan);
@@ -1089,13 +1051,13 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
 
 	/* test for non-zero parity sum */
 	zero_sum_result = 0;
-	tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
-		PAGE_SIZE, &zero_sum_result, 1);
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
-		dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
-			0, PAGE_SIZE, DMA_TO_DEVICE);
-		iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
-	}
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+					IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+					&zero_sum_result, 1);
 
 	cookie = iop_adma_tx_submit(tx);
 	iop_adma_issue_pending(dma_chan);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5c84bf897593..b0864f5b729d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -209,8 +209,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  *	descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
- * @tx_set_dest: set a destination address in a hardware descriptor
- * @tx_set_src: set a source address in a hardware descriptor
  * @callback: routine to call after this operation is complete
  * @callback_param: general parameter to pass to the callback routine
  * ---async_tx api specific fields---
@@ -227,10 +225,6 @@ struct dma_async_tx_descriptor {
 	struct list_head tx_list;
 	struct dma_chan *chan;
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
-	void (*tx_set_dest)(dma_addr_t addr,
-		struct dma_async_tx_descriptor *tx, int index);
-	void (*tx_set_src)(dma_addr_t addr,
-		struct dma_async_tx_descriptor *tx, int index);
 	dma_async_tx_callback callback;
 	void *callback_param;
 	struct list_head depend_list;
@@ -279,15 +273,17 @@ struct dma_device {
 	void (*device_free_chan_resources)(struct dma_chan *chan);
 
 	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
-		struct dma_chan *chan, size_t len, int int_en);
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, int int_en);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
-		struct dma_chan *chan, unsigned int src_cnt, size_t len,
-		int int_en);
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		unsigned int src_cnt, size_t len, int int_en);
 	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
-		struct dma_chan *chan, unsigned int src_cnt, size_t len,
-		u32 *result, int int_en);
+		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
+		size_t len, u32 *result, int int_en);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
-		struct dma_chan *chan, int value, size_t len, int int_en);
+		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
+		int int_en);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan);
 
-- 
cgit 


From d4c56f97ff21df405d0cebe11f49e3c3c79662b5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 2 Feb 2008 19:49:58 -0700
Subject: async_tx: replace 'int_en' with operation preparation flags

Pass a full set of flags to drivers' per-operation 'prep' routines.
Currently the only flag passed is DMA_PREP_INTERRUPT.  The expectation is
that arch-specific async_tx_find_channel() implementations can exploit this
capability to find the best channel for an operation.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@intel.com>
Reviewed-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 crypto/async_tx/async_memcpy.c         |  3 ++-
 crypto/async_tx/async_memset.c         |  3 ++-
 crypto/async_tx/async_xor.c            | 10 ++++++----
 drivers/dma/ioat_dma.c                 |  4 ++--
 drivers/dma/iop-adma.c                 | 20 ++++++++++----------
 include/asm-arm/arch-iop13xx/adma.h    | 18 ++++++++++--------
 include/asm-arm/hardware/iop3xx-adma.h | 30 +++++++++++++++++-------------
 include/linux/dmaengine.h              | 17 +++++++++++++----
 8 files changed, 62 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index faca0bc52068..25dcf33bbc2d 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,6 +52,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
@@ -60,7 +61,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 				       DMA_TO_DEVICE);
 
 		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-						    len, cb_fn != NULL);
+						    len, dma_prep_flags);
 	}
 
 	if (tx) {
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 0c94851cfd37..8e98ab0cd37c 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -52,12 +52,13 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 	if (device) {
 		dma_addr_t dma_dest;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
 		tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
-						    cb_fn != NULL);
+						    dma_prep_flags);
 	}
 
 	if (tx) {
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 12cba1a4205b..68d2fe4465d8 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -45,6 +45,7 @@ do_async_xor(struct dma_device *device,
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
 	struct dma_async_tx_descriptor *tx;
 	int i;
+	unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
 
@@ -60,7 +61,7 @@ do_async_xor(struct dma_device *device,
 	 * in case they can not provide a descriptor
 	 */
 	tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
-					 cb_fn != NULL);
+					 dma_prep_flags);
 	if (!tx) {
 		if (depend_tx)
 			dma_wait_for_async_tx(depend_tx);
@@ -68,7 +69,7 @@ do_async_xor(struct dma_device *device,
 		while (!tx)
 			tx = device->device_prep_dma_xor(chan, dma_dest,
 							 dma_src, src_cnt, len,
-							 cb_fn != NULL);
+							 dma_prep_flags);
 	}
 
 	async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
@@ -268,6 +269,7 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 
 	if (device) {
 		dma_addr_t *dma_src = (dma_addr_t *) src_list;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
@@ -278,7 +280,7 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 
 		tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
 						      len, result,
-						      cb_fn != NULL);
+						      dma_prep_flags);
 		if (!tx) {
 			if (depend_tx)
 				dma_wait_for_async_tx(depend_tx);
@@ -286,7 +288,7 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 			while (!tx)
 				tx = device->device_prep_dma_zero_sum(chan,
 					dma_src, src_cnt, len, result,
-					cb_fn != NULL);
+					dma_prep_flags);
 		}
 
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 5bcfc55a2776..dff38accc5c1 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -701,7 +701,7 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
 						dma_addr_t dma_dest,
 						dma_addr_t dma_src,
 						size_t len,
-						int int_en)
+						unsigned long flags)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	struct ioat_desc_sw *new;
@@ -724,7 +724,7 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
 						dma_addr_t dma_dest,
 						dma_addr_t dma_src,
 						size_t len,
-						int int_en)
+						unsigned long flags)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	struct ioat_desc_sw *new;
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index eda841c60690..3986d54492bd 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -537,7 +537,7 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
-			 dma_addr_t dma_src, size_t len, int int_en)
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -555,7 +555,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_memcpy(grp_start, int_en);
+		iop_desc_init_memcpy(grp_start, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
@@ -569,7 +569,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
 
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
-			 int value, size_t len, int int_en)
+			 int value, size_t len, unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -587,7 +587,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_memset(grp_start, int_en);
+		iop_desc_init_memset(grp_start, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_block_fill_val(grp_start, value);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
@@ -602,7 +602,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
-		      int int_en)
+		      unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -613,15 +613,15 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 	BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
 
 	dev_dbg(iop_chan->device->common.dev,
-		"%s src_cnt: %d len: %u int_en: %d\n",
-		__FUNCTION__, src_cnt, len, int_en);
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__FUNCTION__, src_cnt, len, flags);
 
 	spin_lock_bh(&iop_chan->lock);
 	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_xor(grp_start, src_cnt, int_en);
+		iop_desc_init_xor(grp_start, src_cnt, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		sw_desc->unmap_src_cnt = src_cnt;
@@ -638,7 +638,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
 			   unsigned int src_cnt, size_t len, u32 *result,
-			   int int_en)
+			   unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -655,7 +655,7 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
+		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
 		iop_desc_set_zero_sum_byte_count(grp_start, len);
 		grp_start->xor_check_result = result;
 		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
diff --git a/include/asm-arm/arch-iop13xx/adma.h b/include/asm-arm/arch-iop13xx/adma.h
index 04006c1c5fd7..efd9a5eb1008 100644
--- a/include/asm-arm/arch-iop13xx/adma.h
+++ b/include/asm-arm/arch-iop13xx/adma.h
@@ -247,7 +247,7 @@ static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc,
 }
 
 static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -257,13 +257,13 @@ iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
 
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 }
 
 static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -274,14 +274,15 @@ iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
 	u_desc_ctrl.field.block_fill_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 }
 
 /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
 static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -292,7 +293,7 @@ iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.src_select = src_cnt - 1;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 
@@ -301,7 +302,8 @@ iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 
 /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
 static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -314,7 +316,7 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
 	u_desc_ctrl.field.zero_result = 1;
 	u_desc_ctrl.field.status_write_back_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 
diff --git a/include/asm-arm/hardware/iop3xx-adma.h b/include/asm-arm/hardware/iop3xx-adma.h
index 10834b54f681..5c529e6a5e3b 100644
--- a/include/asm-arm/hardware/iop3xx-adma.h
+++ b/include/asm-arm/hardware/iop3xx-adma.h
@@ -414,7 +414,7 @@ static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
 }
 
 static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
 	union {
@@ -425,14 +425,14 @@ iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.mem_to_mem_en = 1;
 	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->upper_pci_src_addr = 0;
 	hw_desc->crc_addr = 0;
 }
 
 static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	union {
@@ -443,12 +443,13 @@ iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
 	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
 static inline u32
-iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
+iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
+		     unsigned long flags)
 {
 	int i, shift;
 	u32 edcr;
@@ -509,21 +510,23 @@ iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
 
 	u_desc_ctrl.field.dest_write_en = 1;
 	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 
 	return u_desc_ctrl.value;
 }
 
 static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
 {
-	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
+	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
 }
 
 /* return the number of operations */
 static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
 	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
@@ -538,10 +541,10 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
 		i += slots_per_op, j++) {
 		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
+		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
 		u_desc_ctrl.field.dest_write_en = 0;
 		u_desc_ctrl.field.zero_result_en = 1;
-		u_desc_ctrl.field.int_en = int_en;
+		u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 		iter->desc_ctrl = u_desc_ctrl.value;
 
 		/* for the subsequent descriptors preserve the store queue
@@ -559,7 +562,8 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 }
 
 static inline void
-iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	union {
@@ -591,7 +595,7 @@ iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
 	}
 
 	u_desc_ctrl.field.dest_write_en = 0;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index b0864f5b729d..acbb364674ff 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -94,6 +94,15 @@ enum dma_transaction_type {
 /* last transaction type for creation of the capabilities mask */
 #define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
 
+/**
+ * enum dma_prep_flags - DMA flags to augment operation preparation
+ * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
+ * 	this transaction
+ */
+enum dma_prep_flags {
+	DMA_PREP_INTERRUPT = (1 << 0),
+};
+
 /**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
@@ -274,16 +283,16 @@ struct dma_device {
 
 	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-		size_t len, int int_en);
+		size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
-		unsigned int src_cnt, size_t len, int int_en);
+		unsigned int src_cnt, size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
-		size_t len, u32 *result, int int_en);
+		size_t len, u32 *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
-		int int_en);
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan);
 
-- 
cgit 


From 47437b2c9a64315efeb3d84e97ffefd6c3c67ef1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 2 Feb 2008 19:49:59 -0700
Subject: async_tx: allow architecture specific async_tx_find_channel
 implementations

The source and destination addresses are included to allow channel
selection based on address alignment.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 crypto/async_tx/async_memcpy.c |  3 ++-
 crypto/async_tx/async_memset.c |  3 ++-
 crypto/async_tx/async_tx.c     |  6 +++---
 crypto/async_tx/async_xor.c    |  8 ++++++--
 include/linux/async_tx.h       | 11 +++++++++--
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 25dcf33bbc2d..0f6282207b32 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -46,7 +46,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 	struct dma_async_tx_descriptor *depend_tx,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
+	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 8e98ab0cd37c..09c0e83664bc 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -46,7 +46,8 @@ async_memset(struct page *dest, int val, unsigned int offset,
 	struct dma_async_tx_descriptor *depend_tx,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
+	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+						      &dest, 1, NULL, 0, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index f39777f30f60..562882189de5 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -361,13 +361,13 @@ static void __exit async_tx_exit(void)
 }
 
 /**
- * async_tx_find_channel - find a channel to carry out the operation or let
+ * __async_tx_find_channel - find a channel to carry out the operation or let
  *	the transaction execute synchronously
  * @depend_tx: transaction dependency
  * @tx_type: transaction type
  */
 struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
+__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 	enum dma_transaction_type tx_type)
 {
 	/* see if we can keep the chain on one channel */
@@ -383,7 +383,7 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 	} else
 		return NULL;
 }
-EXPORT_SYMBOL_GPL(async_tx_find_channel);
+EXPORT_SYMBOL_GPL(__async_tx_find_channel);
 #else
 static int __init async_tx_init(void)
 {
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 68d2fe4465d8..2259a4ff15cb 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -129,7 +129,9 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	struct dma_async_tx_descriptor *depend_tx,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
+	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+						      &dest, 1, src_list,
+						      src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 	dma_async_tx_callback _cb_fn;
@@ -261,7 +263,9 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
 	struct dma_async_tx_descriptor *depend_tx,
 	dma_async_tx_callback cb_fn, void *cb_param)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
+	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
+						      &dest, 1, src_list,
+						      src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 3d59d371dd32..eb640f0acfac 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -62,9 +62,15 @@ enum async_tx_flags {
 void async_tx_issue_pending_all(void);
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
+#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+#include <asm/async_tx.h>
+#else
+#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
+	 __async_tx_find_channel(dep, type)
 struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
+__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 	enum dma_transaction_type tx_type);
+#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
 #else
 static inline void async_tx_issue_pending_all(void)
 {
@@ -86,7 +92,8 @@ async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
 
 static inline struct dma_chan *
 async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type)
+	enum dma_transaction_type tx_type, struct page **dst, int dst_count,
+	struct page **src, int src_count, size_t len)
 {
 	return NULL;
 }
-- 
cgit 


From 4e701482d1d7b90c358e2bd244bb71623f767120 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Wed, 6 Feb 2008 01:36:14 -0800
Subject: hash: add explicit u32 and u64 versions of hash

The 32-bit version is more efficient (and apparently gives better hash
results than the 64-bit version), so users who are only hashing a 32-bit
quantity can now opt to use the 32-bit version explicitly, rather than
promoting to a long.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hash.h | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hash.h b/include/linux/hash.h
index acf17bb8e7f9..06d25c189cc5 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -1,6 +1,6 @@
 #ifndef _LINUX_HASH_H
 #define _LINUX_HASH_H
-/* Fast hashing routine for a long.
+/* Fast hashing routine for ints,  longs and pointers.
    (C) 2002 William Lee Irwin III, IBM */
 
 /*
@@ -13,23 +13,30 @@
  * them can use shifts and additions instead of multiplications for
  * machines where multiplications are slow.
  */
-#if BITS_PER_LONG == 32
+
+#include <asm/types.h>
+
 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e370001UL
-#elif BITS_PER_LONG == 64
+#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
+#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL
+
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
 #else
-#error Define GOLDEN_RATIO_PRIME for your wordsize.
+#error Wordsize not 32 or 64
 #endif
 
-static inline unsigned long hash_long(unsigned long val, unsigned int bits)
+static inline u64 hash_64(u64 val, unsigned int bits)
 {
-	unsigned long hash = val;
+	u64 hash = val;
 
-#if BITS_PER_LONG == 64
 	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
-	unsigned long n = hash;
+	u64 n = hash;
 	n <<= 18;
 	hash -= n;
 	n <<= 33;
@@ -42,15 +49,20 @@ static inline unsigned long hash_long(unsigned long val, unsigned int bits)
 	hash += n;
 	n <<= 2;
 	hash += n;
-#else
+
+	/* High bits are more random, so use them. */
+	return hash >> (64 - bits);
+}
+
+static inline u32 hash_32(u32 val, unsigned int bits)
+{
 	/* On some cpus multiply is faster, on others gcc will do shifts */
-	hash *= GOLDEN_RATIO_PRIME;
-#endif
+	u32 hash = val * GOLDEN_RATIO_PRIME_32;
 
 	/* High bits are more random, so use them. */
-	return hash >> (BITS_PER_LONG - bits);
+	return hash >> (32 - bits);
 }
-	
+
 static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
 {
 	return hash_long((unsigned long)ptr, bits);
-- 
cgit 


From 0a5dcb51770be3cd0202d6b90a07996fb40130b6 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Wed, 6 Feb 2008 01:36:25 -0800
Subject: Parallel port: convert port_mutex to the mutex API

Parallel port: Convert port_mutex to the mutex API

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/lp.c  | 10 +++++-----
 include/linux/lp.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 81674d7c56c7..60ac642752be 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -312,7 +312,7 @@ static ssize_t lp_write(struct file * file, const char __user * buf,
 	if (copy_size > LP_BUFFER_SIZE)
 		copy_size = LP_BUFFER_SIZE;
 
-	if (down_interruptible (&lp_table[minor].port_mutex))
+	if (mutex_lock_interruptible(&lp_table[minor].port_mutex))
 		return -EINTR;
 
 	if (copy_from_user (kbuf, buf, copy_size)) {
@@ -399,7 +399,7 @@ static ssize_t lp_write(struct file * file, const char __user * buf,
 		lp_release_parport (&lp_table[minor]);
 	}
 out_unlock:
-	up (&lp_table[minor].port_mutex);
+	mutex_unlock(&lp_table[minor].port_mutex);
 
  	return retv;
 }
@@ -421,7 +421,7 @@ static ssize_t lp_read(struct file * file, char __user * buf,
 	if (count > LP_BUFFER_SIZE)
 		count = LP_BUFFER_SIZE;
 
-	if (down_interruptible (&lp_table[minor].port_mutex))
+	if (mutex_lock_interruptible(&lp_table[minor].port_mutex))
 		return -EINTR;
 
 	lp_claim_parport_or_block (&lp_table[minor]);
@@ -479,7 +479,7 @@ static ssize_t lp_read(struct file * file, char __user * buf,
 	if (retval > 0 && copy_to_user (buf, kbuf, retval))
 		retval = -EFAULT;
 
-	up (&lp_table[minor].port_mutex);
+	mutex_unlock(&lp_table[minor].port_mutex);
 
 	return retval;
 }
@@ -888,7 +888,7 @@ static int __init lp_init (void)
 		lp_table[i].last_error = 0;
 		init_waitqueue_head (&lp_table[i].waitq);
 		init_waitqueue_head (&lp_table[i].dataq);
-		init_MUTEX (&lp_table[i].port_mutex);
+		mutex_init(&lp_table[i].port_mutex);
 		lp_table[i].timeout = 10 * HZ;
 	}
 
diff --git a/include/linux/lp.h b/include/linux/lp.h
index 7059b6b9878a..0df024bfd6f0 100644
--- a/include/linux/lp.h
+++ b/include/linux/lp.h
@@ -99,7 +99,7 @@
 #ifdef __KERNEL__
 
 #include <linux/wait.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 /* Magic numbers for defining port-device mappings */
 #define LP_PARPORT_UNSPEC -4
@@ -145,7 +145,7 @@ struct lp_struct {
 #endif
 	wait_queue_head_t waitq;
 	unsigned int last_error;
-	struct semaphore port_mutex;
+	struct mutex port_mutex;
 	wait_queue_head_t dataq;
 	long timeout;
 	unsigned int best_mode;
-- 
cgit 


From 96c5865559cee0f9cbc5173f3c949f6ce3525581 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 6 Feb 2008 01:36:27 -0800
Subject: Allow auto-destruction of loop devices

This allows a flag to be set on loop devices so that when they are
closed for the last time, they'll self-destruct.

In general, so that we can automatically allocate loop devices (as with
losetup -f) and have them disappear when we're done with them.

In particular, right now, so that we can stop relying on the hackish
special-case in umount(8) which kills off loop devices which were set up by
'mount -oloop'.  That means we can stop putting crap in /etc/mtab which
doesn't belong there, which means it can be a symlink to /proc/mounts, which
means yet another writable file on the root filesystem is eliminated and the
'stateless' folks get happier...  and OLPC trac #356 can be closed.

The mount(8) side of that is at
http://marc.info/?l=util-linux-ng&m=119362955431694&w=2

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Cc: Bernardo Innocenti <bernie@codewiz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/loop.c | 8 ++++++++
 include/linux/loop.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b8af22e610df..91ebb007416c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -973,6 +973,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	lo->transfer = xfer->transfer;
 	lo->ioctl = xfer->ioctl;
 
+	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
+	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
+		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
+
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
 	lo->lo_init[0] = info->lo_init[0];
 	lo->lo_init[1] = info->lo_init[1];
@@ -1331,6 +1335,10 @@ static int lo_release(struct inode *inode, struct file *file)
 
 	mutex_lock(&lo->lo_ctl_mutex);
 	--lo->lo_refcnt;
+
+	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt)
+		loop_clr_fd(lo, inode->i_bdev);
+
 	mutex_unlock(&lo->lo_ctl_mutex);
 
 	return 0;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 26a0a103898f..46169a7b559b 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -76,6 +76,7 @@ struct loop_device {
 enum {
 	LO_FLAGS_READ_ONLY	= 1,
 	LO_FLAGS_USE_AOPS	= 2,
+	LO_FLAGS_AUTOCLEAR	= 4,
 };
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
-- 
cgit 


From f74596d07957235ad9da5120029348b372224a27 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:36:35 -0800
Subject: proper show_interrupts() prototype

Add a proper prototype for show_interrupts() in include/linux/interrupt.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/h8300/kernel/irq.c   | 1 +
 fs/proc/proc_misc.c       | 2 +-
 include/linux/interrupt.h | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 8dec4dd57b4e..5a1b4cfea05b 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -14,6 +14,7 @@
 #include <linux/random.h>
 #include <linux/bootmem.h>
 #include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/system.h>
 #include <asm/traps.h>
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 51288db37a0c..c0a90954016f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
+#include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -599,7 +600,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
 }
 
 
-extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */
 static struct seq_operations int_seq_ops = {
 	.start = int_seq_start,
 	.next  = int_seq_next,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c3db4a00f1fa..dea7598aeff4 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -444,4 +444,6 @@ static inline void init_irq_proc(void)
 }
 #endif
 
+int show_interrupts(struct seq_file *p, void *v);
+
 #endif
-- 
cgit 


From 83bad1d764b836a482b88e0a1f44d7a5c3e1fee0 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:36:36 -0800
Subject: scheduled OSS driver removal

This patch contains the scheduled removal of OSS drivers whose config
options have been removed in 2.6.23.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt |    7 -
 arch/ppc/platforms/prep_setup.c            |   81 -
 drivers/media/video/Makefile               |    1 -
 drivers/media/video/tvmixer.c              |  336 ---
 include/asm-powerpc/dma.h                  |   39 +-
 include/linux/ac97_codec.h                 |    7 -
 sound/oss/Makefile                         |    9 -
 sound/oss/ac97_codec.c                     |  284 ---
 sound/oss/btaudio.c                        | 1139 ---------
 sound/oss/cs4232.c                         |  526 ----
 sound/oss/i810_audio.c                     | 3656 ----------------------------
 sound/oss/via82cxxx_audio.c                | 3616 ---------------------------
 12 files changed, 4 insertions(+), 9697 deletions(-)
 delete mode 100644 drivers/media/video/tvmixer.c
 delete mode 100644 sound/oss/btaudio.c
 delete mode 100644 sound/oss/cs4232.c
 delete mode 100644 sound/oss/i810_audio.c
 delete mode 100644 sound/oss/via82cxxx_audio.c

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index a7d9d179131a..68ce1300a360 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -208,13 +208,6 @@ Who:	Randy Dunlap <randy.dunlap@oracle.com>
 
 ---------------------------
 
-What:  drivers depending on OSS_OBSOLETE
-When:  options in 2.6.23, code in 2.6.25
-Why:   obsolete OSS drivers
-Who:   Adrian Bunk <bunk@stusta.de>
-
----------------------------
-
 What: libata spindown skipping and warning
 When: Dec 2008
 Why:  Some halt(8) implementations synchronize caches for and spin
diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c
index 3c56654bfc6f..38449855d5ff 100644
--- a/arch/ppc/platforms/prep_setup.c
+++ b/arch/ppc/platforms/prep_setup.c
@@ -91,20 +91,11 @@ extern void prep_tiger1_setup_pci(char *irq_edge_mask_lo, char *irq_edge_mask_hi
 #define cached_21	(((char *)(ppc_cached_irq_mask))[3])
 #define cached_A1	(((char *)(ppc_cached_irq_mask))[2])
 
-#ifdef CONFIG_SOUND_CS4232
-long ppc_cs4232_dma, ppc_cs4232_dma2;
-#endif
-
 extern PTE *Hash, *Hash_end;
 extern unsigned long Hash_size, Hash_mask;
 extern int probingmem;
 extern unsigned long loops_per_jiffy;
 
-#ifdef CONFIG_SOUND_CS4232
-EXPORT_SYMBOL(ppc_cs4232_dma);
-EXPORT_SYMBOL(ppc_cs4232_dma2);
-#endif
-
 /* useful ISA ports */
 #define PREP_SYSCTL	0x81c
 /* present in the IBM reference design; possibly identical in Mot boxes: */
@@ -569,74 +560,6 @@ prep_show_percpuinfo(struct seq_file *m, int i)
 	return 0;
 }
 
-#ifdef CONFIG_SOUND_CS4232
-static long __init masktoint(unsigned int i)
-{
-	int t = -1;
-	while (i >> ++t)
-		;
-	return (t-1);
-}
-
-/*
- * ppc_cs4232_dma and ppc_cs4232_dma2 are used in include/asm/dma.h
- * to distinguish sound dma-channels from others. This is because
- * blocksize on 16 bit dma-channels 5,6,7 is 128k, but
- * the cs4232.c uses 64k like on 8 bit dma-channels 0,1,2,3
- */
-
-static void __init prep_init_sound(void)
-{
-	PPC_DEVICE *audiodevice = NULL;
-
-	/*
-	 * Get the needed resource information from residual data.
-	 *
-	 */
-	if (have_residual_data)
-		audiodevice = residual_find_device(~0, NULL,
-				MultimediaController, AudioController, -1, 0);
-
-	if (audiodevice != NULL) {
-		PnP_TAG_PACKET *pkt;
-
-		pkt = PnP_find_packet((unsigned char *)&res->DevicePnPHeap[audiodevice->AllocatedOffset],
-				S5_Packet, 0);
-		if (pkt != NULL)
-			ppc_cs4232_dma = masktoint(pkt->S5_Pack.DMAMask);
-		pkt = PnP_find_packet((unsigned char*)&res->DevicePnPHeap[audiodevice->AllocatedOffset],
-				S5_Packet, 1);
-		if (pkt != NULL)
-			ppc_cs4232_dma2 = masktoint(pkt->S5_Pack.DMAMask);
-	}
-
-	/*
-	 * These are the PReP specs' defaults for the cs4231.  We use these
-	 * as fallback incase we don't have residual data.
-	 * At least the IBM Thinkpad 850 with IDE DMA Channels at 6 and 7
-	 * will use the other values.
-	 */
-	if (audiodevice == NULL) {
-		switch (_prep_type) {
-		case _PREP_IBM:
-			ppc_cs4232_dma = 1;
-			ppc_cs4232_dma2 = -1;
-			break;
-		default:
-			ppc_cs4232_dma = 6;
-			ppc_cs4232_dma2 = 7;
-		}
-	}
-
-	/*
-	 * Find a way to push this information to the cs4232 driver
-	 * Give it out with printk, when not in cmd_line?
-	 * Append it to cmd_line and boot_command_line?
-	 * Format is cs4232=io,irq,dma,dma2
-	 */
-}
-#endif /* CONFIG_SOUND_CS4232 */
-
 /*
  * Fill out screen_info according to the residual data. This allows us to use
  * at least vesafb.
@@ -898,10 +821,6 @@ prep_setup_arch(void)
 		}
 	}
 
-#ifdef CONFIG_SOUND_CS4232
-	prep_init_sound();
-#endif /* CONFIG_SOUND_CS4232 */
-
 	prep_init_vesa();
 
 	switch (_prep_type) {
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 28ddd146c1c5..850b8c6f4577 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_VIDEO_IR_I2C)  += ir-kbd-i2c.o
 obj-$(CONFIG_VIDEO_TVAUDIO) += tvaudio.o
 obj-$(CONFIG_VIDEO_TDA7432) += tda7432.o
 obj-$(CONFIG_VIDEO_TDA9875) += tda9875.o
-obj-$(CONFIG_SOUND_TVMIXER) += tvmixer.o
 
 obj-$(CONFIG_VIDEO_SAA6588) += saa6588.o
 obj-$(CONFIG_VIDEO_SAA5246A) += saa5246a.o
diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c
deleted file mode 100644
index 9fa5b702e073..000000000000
--- a/drivers/media/video/tvmixer.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/videodev.h>
-#include <linux/init.h>
-#include <linux/kdev_t.h>
-#include <linux/sound.h>
-#include <linux/soundcard.h>
-
-#include <asm/semaphore.h>
-#include <asm/uaccess.h>
-
-
-#define DEV_MAX  4
-
-static int devnr = -1;
-module_param(devnr, int, 0644);
-
-MODULE_AUTHOR("Gerd Knorr");
-MODULE_LICENSE("GPL");
-
-/* ----------------------------------------------------------------------- */
-
-struct TVMIXER {
-	struct i2c_client *dev;
-	int minor;
-	int count;
-};
-
-static struct TVMIXER devices[DEV_MAX];
-
-static int tvmixer_adapters(struct i2c_adapter *adap);
-static int tvmixer_clients(struct i2c_client *client);
-
-/* ----------------------------------------------------------------------- */
-
-static int mix_to_v4l(int i)
-{
-	int r;
-
-	r = ((i & 0xff) * 65536 + 50) / 100;
-	if (r > 65535) r = 65535;
-	if (r <     0) r =     0;
-	return r;
-}
-
-static int v4l_to_mix(int i)
-{
-	int r;
-
-	r = (i * 100 + 32768) / 65536;
-	if (r > 100) r = 100;
-	if (r <   0) r =   0;
-	return r | (r << 8);
-}
-
-static int v4l_to_mix2(int l, int r)
-{
-	r = (r * 100 + 32768) / 65536;
-	if (r > 100) r = 100;
-	if (r <   0) r =   0;
-	l = (l * 100 + 32768) / 65536;
-	if (l > 100) l = 100;
-	if (l <   0) l =   0;
-	return (r << 8) | l;
-}
-
-static int tvmixer_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct video_audio va;
-	int left,right,ret,val = 0;
-	struct TVMIXER *mix = file->private_data;
-	struct i2c_client *client = mix->dev;
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-
-	if (NULL == client)
-		return -ENODEV;
-
-	if (cmd == SOUND_MIXER_INFO) {
-		mixer_info info;
-		strlcpy(info.id, "tv card", sizeof(info.id));
-		strlcpy(info.name, client->name, sizeof(info.name));
-		info.modify_counter = 42 /* FIXME */;
-		if (copy_to_user(argp, &info, sizeof(info)))
-			return -EFAULT;
-		return 0;
-	}
-	if (cmd == SOUND_OLD_MIXER_INFO) {
-		_old_mixer_info info;
-		strlcpy(info.id, "tv card", sizeof(info.id));
-		strlcpy(info.name, client->name, sizeof(info.name));
-		if (copy_to_user(argp, &info, sizeof(info)))
-			return -EFAULT;
-		return 0;
-	}
-	if (cmd == OSS_GETVERSION)
-		return put_user(SOUND_VERSION, p);
-
-	if (_SIOC_DIR(cmd) & _SIOC_WRITE)
-		if (get_user(val, p))
-			return -EFAULT;
-
-	/* read state */
-	memset(&va,0,sizeof(va));
-	client->driver->command(client,VIDIOCGAUDIO,&va);
-
-	switch (cmd) {
-	case MIXER_READ(SOUND_MIXER_RECMASK):
-	case MIXER_READ(SOUND_MIXER_CAPS):
-	case MIXER_READ(SOUND_MIXER_RECSRC):
-	case MIXER_WRITE(SOUND_MIXER_RECSRC):
-		ret = 0;
-		break;
-
-	case MIXER_READ(SOUND_MIXER_STEREODEVS):
-		ret = SOUND_MASK_VOLUME;
-		break;
-	case MIXER_READ(SOUND_MIXER_DEVMASK):
-		ret = SOUND_MASK_VOLUME;
-		if (va.flags & VIDEO_AUDIO_BASS)
-			ret |= SOUND_MASK_BASS;
-		if (va.flags & VIDEO_AUDIO_TREBLE)
-			ret |= SOUND_MASK_TREBLE;
-		break;
-
-	case MIXER_WRITE(SOUND_MIXER_VOLUME):
-		left  = mix_to_v4l(val);
-		right = mix_to_v4l(val >> 8);
-		va.volume  = max(left,right);
-		va.balance = (32768*min(left,right)) / (va.volume ? va.volume : 1);
-		va.balance = (left<right) ? (65535-va.balance) : va.balance;
-		if (va.volume)
-			va.flags &= ~VIDEO_AUDIO_MUTE;
-		client->driver->command(client,VIDIOCSAUDIO,&va);
-		client->driver->command(client,VIDIOCGAUDIO,&va);
-		/* fall throuth */
-	case MIXER_READ(SOUND_MIXER_VOLUME):
-		left  = (min(65536 - va.balance,32768) *
-			 va.volume) / 32768;
-		right = (min(va.balance,(u16)32768) *
-			 va.volume) / 32768;
-		ret = v4l_to_mix2(left,right);
-		break;
-
-	case MIXER_WRITE(SOUND_MIXER_BASS):
-		va.bass = mix_to_v4l(val);
-		client->driver->command(client,VIDIOCSAUDIO,&va);
-		client->driver->command(client,VIDIOCGAUDIO,&va);
-		/* fall throuth  */
-	case MIXER_READ(SOUND_MIXER_BASS):
-		ret = v4l_to_mix(va.bass);
-		break;
-
-	case MIXER_WRITE(SOUND_MIXER_TREBLE):
-		va.treble = mix_to_v4l(val);
-		client->driver->command(client,VIDIOCSAUDIO,&va);
-		client->driver->command(client,VIDIOCGAUDIO,&va);
-		/* fall throuth */
-	case MIXER_READ(SOUND_MIXER_TREBLE):
-		ret = v4l_to_mix(va.treble);
-		break;
-
-	default:
-		return -EINVAL;
-	}
-	if (put_user(ret, p))
-		return -EFAULT;
-	return 0;
-}
-
-static int tvmixer_open(struct inode *inode, struct file *file)
-{
-	int i, minor = iminor(inode);
-	struct TVMIXER *mix = NULL;
-	struct i2c_client *client = NULL;
-
-	for (i = 0; i < DEV_MAX; i++) {
-		if (devices[i].minor == minor) {
-			mix = devices+i;
-			client = mix->dev;
-			break;
-		}
-	}
-
-	if (NULL == client)
-		return -ENODEV;
-
-	/* lock bttv in memory while the mixer is in use  */
-	file->private_data = mix;
-	if (client->adapter->owner)
-		try_module_get(client->adapter->owner);
-	return 0;
-}
-
-static int tvmixer_release(struct inode *inode, struct file *file)
-{
-	struct TVMIXER *mix = file->private_data;
-	struct i2c_client *client;
-
-	client = mix->dev;
-	if (NULL == client) {
-		return -ENODEV;
-	}
-
-	module_put(client->adapter->owner);
-	return 0;
-}
-
-static struct i2c_driver driver = {
-	.driver = {
-		.name    = "tvmixer",
-	},
-	.id              = I2C_DRIVERID_TVMIXER,
-	.detach_adapter  = tvmixer_adapters,
-	.attach_adapter  = tvmixer_adapters,
-	.detach_client   = tvmixer_clients,
-};
-
-static const struct file_operations tvmixer_fops = {
-	.owner		= THIS_MODULE,
-	.llseek         = no_llseek,
-	.ioctl          = tvmixer_ioctl,
-	.open           = tvmixer_open,
-	.release        = tvmixer_release,
-};
-
-/* ----------------------------------------------------------------------- */
-
-static int tvmixer_adapters(struct i2c_adapter *adap)
-{
-	struct i2c_client *client;
-
-	list_for_each_entry(client, &adap->clients, list)
-		tvmixer_clients(client);
-	return 0;
-}
-
-static int tvmixer_clients(struct i2c_client *client)
-{
-	struct video_audio va;
-	int i,minor;
-
-	if (!(client->adapter->class & I2C_CLASS_TV_ANALOG))
-		return -1;
-
-	/* unregister ?? */
-	for (i = 0; i < DEV_MAX; i++) {
-		if (devices[i].dev == client) {
-			/* unregister */
-			unregister_sound_mixer(devices[i].minor);
-			devices[i].dev = NULL;
-			devices[i].minor = -1;
-			printk("tvmixer: %s unregistered (#1)\n",
-			       client->name);
-			return 0;
-		}
-	}
-
-	/* look for a free slot */
-	for (i = 0; i < DEV_MAX; i++)
-		if (NULL == devices[i].dev)
-			break;
-	if (i == DEV_MAX) {
-		printk(KERN_WARNING "tvmixer: DEV_MAX too small\n");
-		return -1;
-	}
-
-	/* audio chip with mixer ??? */
-	if (NULL == client->driver->command)
-		return -1;
-	memset(&va,0,sizeof(va));
-	if (0 != client->driver->command(client,VIDIOCGAUDIO,&va))
-		return -1;
-	if (0 == (va.flags & VIDEO_AUDIO_VOLUME))
-		return -1;
-
-	/* everything is fine, register */
-	if ((minor = register_sound_mixer(&tvmixer_fops,devnr)) < 0) {
-		printk(KERN_ERR "tvmixer: cannot allocate mixer device\n");
-		return -1;
-	}
-
-	devices[i].minor = minor;
-	devices[i].count = 0;
-	devices[i].dev   = client;
-	printk("tvmixer: %s (%s) registered with minor %d\n",
-	       client->name,client->adapter->name,minor);
-
-	return 0;
-}
-
-/* ----------------------------------------------------------------------- */
-
-static int __init tvmixer_init_module(void)
-{
-	int i;
-
-	for (i = 0; i < DEV_MAX; i++)
-		devices[i].minor = -1;
-
-	return i2c_add_driver(&driver);
-}
-
-static void __exit tvmixer_cleanup_module(void)
-{
-	int i;
-
-	i2c_del_driver(&driver);
-	for (i = 0; i < DEV_MAX; i++) {
-		if (devices[i].minor != -1) {
-			unregister_sound_mixer(devices[i].minor);
-			printk("tvmixer: %s unregistered (#2)\n",
-			       devices[i].dev->name);
-		}
-	}
-}
-
-module_init(tvmixer_init_module);
-module_exit(tvmixer_cleanup_module);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/include/asm-powerpc/dma.h b/include/asm-powerpc/dma.h
index 7a4374bdbef4..a7e06e25c708 100644
--- a/include/asm-powerpc/dma.h
+++ b/include/asm-powerpc/dma.h
@@ -93,16 +93,6 @@
  *
  */
 
-/* see prep_setup_arch() for detailed informations */
-#if defined(CONFIG_SOUND_CS4232) && defined(CONFIG_PPC_PREP)
-extern long ppc_cs4232_dma, ppc_cs4232_dma2;
-#define SND_DMA1 ppc_cs4232_dma
-#define SND_DMA2 ppc_cs4232_dma2
-#else
-#define SND_DMA1 -1
-#define SND_DMA2 -1
-#endif
-
 /* 8237 DMA controllers */
 #define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
 #define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
@@ -269,24 +259,15 @@ static __inline__ void set_dma_page(unsigned int dmanr, int pagenr)
 		dma_outb(pagenr >> 8, DMA_HI_PAGE_3);
 		break;
 	case 5:
-		if (SND_DMA1 == 5 || SND_DMA2 == 5)
-			dma_outb(pagenr, DMA_LO_PAGE_5);
-		else
-			dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5);
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5);
 		dma_outb(pagenr >> 8, DMA_HI_PAGE_5);
 		break;
 	case 6:
-		if (SND_DMA1 == 6 || SND_DMA2 == 6)
-			dma_outb(pagenr, DMA_LO_PAGE_6);
-		else
-			dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6);
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6);
 		dma_outb(pagenr >> 8, DMA_HI_PAGE_6);
 		break;
 	case 7:
-		if (SND_DMA1 == 7 || SND_DMA2 == 7)
-			dma_outb(pagenr, DMA_LO_PAGE_7);
-		else
-			dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7);
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7);
 		dma_outb(pagenr >> 8, DMA_HI_PAGE_7);
 		break;
 	}
@@ -302,12 +283,6 @@ static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys)
 			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
 		dma_outb((phys >> 8) & 0xff,
 			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
-	} else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) {
-		dma_outb(phys & 0xff,
-			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
-		dma_outb((phys >> 8) & 0xff,
-			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
-		dma_outb((dmanr & 3), DMA2_EXT_REG);
 	} else {
 		dma_outb((phys >> 1) & 0xff,
 			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
@@ -334,11 +309,6 @@ static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
 			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
 		dma_outb((count >> 8) & 0xff,
 			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
-	} else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) {
-		dma_outb(count & 0xff,
-			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
-		dma_outb((count >> 8) & 0xff,
-			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
 	} else {
 		dma_outb((count >> 1) & 0xff,
 			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
@@ -368,8 +338,7 @@ static __inline__ int get_dma_residue(unsigned int dmanr)
 	count = 1 + dma_inb(io_port);
 	count += dma_inb(io_port) << 8;
 
-	return (dmanr <= 3 || dmanr == SND_DMA1 || dmanr == SND_DMA2)
-	    ? count : (count << 1);
+	return (dmanr <= 3) ? count : (count << 1);
 }
 
 /* These are in kernel/dma.c: */
diff --git a/include/linux/ac97_codec.h b/include/linux/ac97_codec.h
index 22eb9367235a..0260c3e79fdd 100644
--- a/include/linux/ac97_codec.h
+++ b/include/linux/ac97_codec.h
@@ -326,11 +326,7 @@ struct ac97_ops
 #define AC97_DEFAULT_POWER_OFF 4 /* Needs warm reset to power up */
 };
 
-extern int ac97_read_proc (char *page_out, char **start, off_t off,
-			   int count, int *eof, void *data);
 extern int ac97_probe_codec(struct ac97_codec *);
-extern unsigned int ac97_set_adc_rate(struct ac97_codec *codec, unsigned int rate);
-extern unsigned int ac97_set_dac_rate(struct ac97_codec *codec, unsigned int rate);
 
 extern struct ac97_codec *ac97_alloc_codec(void);
 extern void ac97_release_codec(struct ac97_codec *codec);
@@ -363,7 +359,4 @@ struct ac97_quirk {
 	int type;               /* quirk type above */
 };
 
-struct pci_dev;
-extern int ac97_tune_hardware(struct pci_dev *pdev, struct ac97_quirk *quirk, int override);
-
 #endif /* _AC97_CODEC_H_ */
diff --git a/sound/oss/Makefile b/sound/oss/Makefile
index f883c4b676ab..1f86299fae40 100644
--- a/sound/oss/Makefile
+++ b/sound/oss/Makefile
@@ -6,7 +6,6 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_SOUND_OSS)		+= sound.o
-obj-$(CONFIG_SOUND_CS4232)	+= cs4232.o ad1848.o 
 
 # Please leave it as is, cause the link order is significant !
 
@@ -16,7 +15,6 @@ obj-$(CONFIG_SOUND_AEDSP16)	+= aedsp16.o
 obj-$(CONFIG_SOUND_PSS)		+= pss.o ad1848.o mpu401.o
 obj-$(CONFIG_SOUND_TRIX)	+= trix.o ad1848.o sb_lib.o uart401.o
 obj-$(CONFIG_SOUND_SSCAPE)	+= sscape.o ad1848.o mpu401.o
-obj-$(CONFIG_SOUND_CS4232)	+= cs4232.o uart401.o
 obj-$(CONFIG_SOUND_MSS)		+= ad1848.o
 obj-$(CONFIG_SOUND_PAS)		+= pas2.o sb.o sb_lib.o uart401.o
 obj-$(CONFIG_SOUND_SB)		+= sb.o sb_lib.o uart401.o
@@ -27,19 +25,12 @@ obj-$(CONFIG_SOUND_YM3812)	+= opl3.o
 obj-$(CONFIG_SOUND_VMIDI)	+= v_midi.o
 obj-$(CONFIG_SOUND_VIDC)	+= vidc_mod.o
 obj-$(CONFIG_SOUND_WAVEARTIST)	+= waveartist.o
-
-obj-$(CONFIG_SOUND_VIA82CXXX)	+= via82cxxx_audio.o ac97_codec.o
-ifeq ($(CONFIG_MIDI_VIA82CXXX),y)
-  obj-$(CONFIG_SOUND_VIA82CXXX) += sound.o uart401.o
-endif
 obj-$(CONFIG_SOUND_MSNDCLAS)	+= msnd.o msnd_classic.o
 obj-$(CONFIG_SOUND_MSNDPIN)	+= msnd.o msnd_pinnacle.o
 obj-$(CONFIG_SOUND_VWSND)	+= vwsnd.o
-obj-$(CONFIG_SOUND_ICH)		+= i810_audio.o ac97_codec.o
 obj-$(CONFIG_SOUND_AU1550_AC97)	+= au1550_ac97.o ac97_codec.o
 obj-$(CONFIG_SOUND_TRIDENT)	+= trident.o ac97_codec.o
 obj-$(CONFIG_SOUND_BCM_CS4297A)	+= swarm_cs4297a.o
-obj-$(CONFIG_SOUND_BT878)	+= btaudio.o
 
 obj-$(CONFIG_SOUND_WM97XX)	+= ac97_plugin_wm97xx.o
 
diff --git a/sound/oss/ac97_codec.c b/sound/oss/ac97_codec.c
index fef56cac06c8..87a672680761 100644
--- a/sound/oss/ac97_codec.c
+++ b/sound/oss/ac97_codec.c
@@ -189,42 +189,6 @@ static const struct {
 	{0x57454301, "Winbond 83971D",		&null_ops},
 };
 
-static const char *ac97_stereo_enhancements[] =
-{
-	/*   0 */ "No 3D Stereo Enhancement",
-	/*   1 */ "Analog Devices Phat Stereo",
-	/*   2 */ "Creative Stereo Enhancement",
-	/*   3 */ "National Semi 3D Stereo Enhancement",
-	/*   4 */ "YAMAHA Ymersion",
-	/*   5 */ "BBE 3D Stereo Enhancement",
-	/*   6 */ "Crystal Semi 3D Stereo Enhancement",
-	/*   7 */ "Qsound QXpander",
-	/*   8 */ "Spatializer 3D Stereo Enhancement",
-	/*   9 */ "SRS 3D Stereo Enhancement",
-	/*  10 */ "Platform Tech 3D Stereo Enhancement",
-	/*  11 */ "AKM 3D Audio",
-	/*  12 */ "Aureal Stereo Enhancement",
-	/*  13 */ "Aztech 3D Enhancement",
-	/*  14 */ "Binaura 3D Audio Enhancement",
-	/*  15 */ "ESS Technology Stereo Enhancement",
-	/*  16 */ "Harman International VMAx",
-	/*  17 */ "Nvidea 3D Stereo Enhancement",
-	/*  18 */ "Philips Incredible Sound",
-	/*  19 */ "Texas Instruments 3D Stereo Enhancement",
-	/*  20 */ "VLSI Technology 3D Stereo Enhancement",
-	/*  21 */ "TriTech 3D Stereo Enhancement",
-	/*  22 */ "Realtek 3D Stereo Enhancement",
-	/*  23 */ "Samsung 3D Stereo Enhancement",
-	/*  24 */ "Wolfson Microelectronics 3D Enhancement",
-	/*  25 */ "Delta Integration 3D Enhancement",
-	/*  26 */ "SigmaTel 3D Enhancement",
-	/*  27 */ "Winbond 3D Stereo Enhancement",
-	/*  28 */ "Rockwell 3D Stereo Enhancement",
-	/*  29 */ "Reserved 29",
-	/*  30 */ "Reserved 30",
-	/*  31 */ "Reserved 31"
-};
-
 /* this table has default mixer values for all OSS mixers. */
 static struct mixer_defaults {
 	int mixer;
@@ -614,83 +578,6 @@ static int ac97_mixer_ioctl(struct ac97_codec *codec, unsigned int cmd, unsigned
 	return -EINVAL;
 }
 
-/* entry point for /proc/driver/controller_vendor/ac97/%d */
-int ac97_read_proc (char *page, char **start, off_t off,
-		    int count, int *eof, void *data)
-{
-	int len = 0, cap, extid, val, id1, id2;
-	struct ac97_codec *codec;
-	int is_ac97_20 = 0;
-
-	if ((codec = data) == NULL)
-		return -ENODEV;
-
-	id1 = codec->codec_read(codec, AC97_VENDOR_ID1);
-	id2 = codec->codec_read(codec, AC97_VENDOR_ID2);
-	len += sprintf (page+len, "Vendor name      : %s\n", codec->name);
-	len += sprintf (page+len, "Vendor id        : %04X %04X\n", id1, id2);
-
-	extid = codec->codec_read(codec, AC97_EXTENDED_ID);
-	extid &= ~((1<<2)|(1<<4)|(1<<5)|(1<<10)|(1<<11)|(1<<12)|(1<<13));
-	len += sprintf (page+len, "AC97 Version     : %s\n",
-			extid ? "2.0 or later" : "1.0");
-	if (extid) is_ac97_20 = 1;
-
-	cap = codec->codec_read(codec, AC97_RESET);
-	len += sprintf (page+len, "Capabilities     :%s%s%s%s%s%s\n",
-			cap & 0x0001 ? " -dedicated MIC PCM IN channel-" : "",
-			cap & 0x0002 ? " -reserved1-" : "",
-			cap & 0x0004 ? " -bass & treble-" : "",
-			cap & 0x0008 ? " -simulated stereo-" : "",
-			cap & 0x0010 ? " -headphone out-" : "",
-			cap & 0x0020 ? " -loudness-" : "");
-	val = cap & 0x00c0;
-	len += sprintf (page+len, "DAC resolutions  :%s%s%s\n",
-			" -16-bit-",
-			val & 0x0040 ? " -18-bit-" : "",
-			val & 0x0080 ? " -20-bit-" : "");
-	val = cap & 0x0300;
-	len += sprintf (page+len, "ADC resolutions  :%s%s%s\n",
-			" -16-bit-",
-			val & 0x0100 ? " -18-bit-" : "",
-			val & 0x0200 ? " -20-bit-" : "");
-	len += sprintf (page+len, "3D enhancement   : %s\n",
-			ac97_stereo_enhancements[(cap >> 10) & 0x1f]);
-
-	val = codec->codec_read(codec, AC97_GENERAL_PURPOSE);
-	len += sprintf (page+len, "POP path         : %s 3D\n"
-			"Sim. stereo      : %s\n"
-			"3D enhancement   : %s\n"
-			"Loudness         : %s\n"
-			"Mono output      : %s\n"
-			"MIC select       : %s\n"
-			"ADC/DAC loopback : %s\n",
-			val & 0x8000 ? "post" : "pre",
-			val & 0x4000 ? "on" : "off",
-			val & 0x2000 ? "on" : "off",
-			val & 0x1000 ? "on" : "off",
-			val & 0x0200 ? "MIC" : "MIX",
-			val & 0x0100 ? "MIC2" : "MIC1",
-			val & 0x0080 ? "on" : "off");
-
-	extid = codec->codec_read(codec, AC97_EXTENDED_ID);
-	cap = extid;
-	len += sprintf (page+len, "Ext Capabilities :%s%s%s%s%s%s%s\n",
-			cap & 0x0001 ? " -var rate PCM audio-" : "",
-			cap & 0x0002 ? " -2x PCM audio out-" : "",
-			cap & 0x0008 ? " -var rate MIC in-" : "",
-			cap & 0x0040 ? " -PCM center DAC-" : "",
-			cap & 0x0080 ? " -PCM surround DAC-" : "",
-			cap & 0x0100 ? " -PCM LFE DAC-" : "",
-			cap & 0x0200 ? " -slot/DAC mappings-" : "");
-	if (is_ac97_20) {
-		len += sprintf (page+len, "Front DAC rate   : %d\n",
-				codec->codec_read(codec, AC97_PCM_FRONT_DAC_RATE));
-	}
-
-	return len;
-}
-
 /**
  *	codec_id	-  Turn id1/id2 into a PnP string
  *	@id1: Vendor ID1
@@ -1313,176 +1200,5 @@ static int pt101_init(struct ac97_codec * codec)
 #endif
 	
 
-EXPORT_SYMBOL(ac97_read_proc);
 EXPORT_SYMBOL(ac97_probe_codec);
 
-/*
- *	AC97 library support routines
- */	
- 
-/**
- *	ac97_set_dac_rate	-	set codec rate adaption
- *	@codec: ac97 code
- *	@rate: rate in hertz
- *
- *	Set the DAC rate. Assumes the codec supports VRA. The caller is
- *	expected to have checked this little detail.
- */
- 
-unsigned int ac97_set_dac_rate(struct ac97_codec *codec, unsigned int rate)
-{
-	unsigned int new_rate = rate;
-	u32 dacp;
-	u32 mast_vol, phone_vol, mono_vol, pcm_vol;
-	u32 mute_vol = 0x8000;	/* The mute volume? */
-
-	if(rate != codec->codec_read(codec, AC97_PCM_FRONT_DAC_RATE))
-	{
-		/* Mute several registers */
-		mast_vol = codec->codec_read(codec, AC97_MASTER_VOL_STEREO);
-		mono_vol = codec->codec_read(codec, AC97_MASTER_VOL_MONO);
-		phone_vol = codec->codec_read(codec, AC97_HEADPHONE_VOL);
-		pcm_vol = codec->codec_read(codec, AC97_PCMOUT_VOL);
-		codec->codec_write(codec, AC97_MASTER_VOL_STEREO, mute_vol);
-		codec->codec_write(codec, AC97_MASTER_VOL_MONO, mute_vol);
-		codec->codec_write(codec, AC97_HEADPHONE_VOL, mute_vol);
-		codec->codec_write(codec, AC97_PCMOUT_VOL, mute_vol);
-		
-		/* Power down the DAC */
-		dacp=codec->codec_read(codec, AC97_POWER_CONTROL);
-		codec->codec_write(codec, AC97_POWER_CONTROL, dacp|0x0200);
-		/* Load the rate and read the effective rate */
-		codec->codec_write(codec, AC97_PCM_FRONT_DAC_RATE, rate);
-		new_rate=codec->codec_read(codec, AC97_PCM_FRONT_DAC_RATE);
-		/* Power it back up */
-		codec->codec_write(codec, AC97_POWER_CONTROL, dacp);
-
-		/* Restore volumes */
-		codec->codec_write(codec, AC97_MASTER_VOL_STEREO, mast_vol);
-		codec->codec_write(codec, AC97_MASTER_VOL_MONO, mono_vol);
-		codec->codec_write(codec, AC97_HEADPHONE_VOL, phone_vol);
-		codec->codec_write(codec, AC97_PCMOUT_VOL, pcm_vol);
-	}
-	return new_rate;
-}
-
-EXPORT_SYMBOL(ac97_set_dac_rate);
-
-/**
- *	ac97_set_adc_rate	-	set codec rate adaption
- *	@codec: ac97 code
- *	@rate: rate in hertz
- *
- *	Set the ADC rate. Assumes the codec supports VRA. The caller is
- *	expected to have checked this little detail.
- */
-
-unsigned int ac97_set_adc_rate(struct ac97_codec *codec, unsigned int rate)
-{
-	unsigned int new_rate = rate;
-	u32 dacp;
-
-	if(rate != codec->codec_read(codec, AC97_PCM_LR_ADC_RATE))
-	{
-		/* Power down the ADC */
-		dacp=codec->codec_read(codec, AC97_POWER_CONTROL);
-		codec->codec_write(codec, AC97_POWER_CONTROL, dacp|0x0100);
-		/* Load the rate and read the effective rate */
-		codec->codec_write(codec, AC97_PCM_LR_ADC_RATE, rate);
-		new_rate=codec->codec_read(codec, AC97_PCM_LR_ADC_RATE);
-		/* Power it back up */
-		codec->codec_write(codec, AC97_POWER_CONTROL, dacp);
-	}
-	return new_rate;
-}
-
-EXPORT_SYMBOL(ac97_set_adc_rate);
-
-static int swap_headphone(int remove_master)
-{
-	struct list_head *l;
-	struct ac97_codec *c;
-	
-	if (remove_master) {
-		mutex_lock(&codec_mutex);
-		list_for_each(l, &codecs)
-		{
-			c = list_entry(l, struct ac97_codec, list);
-			if (supported_mixer(c, SOUND_MIXER_PHONEOUT))
-				c->supported_mixers &= ~SOUND_MASK_PHONEOUT;
-		}
-		mutex_unlock(&codec_mutex);
-	} else
-		ac97_hw[SOUND_MIXER_PHONEOUT].offset = AC97_MASTER_VOL_STEREO;
-
-	/* Scale values already match */
-	ac97_hw[SOUND_MIXER_VOLUME].offset = AC97_MASTER_VOL_MONO;
-	return 0;
-}
-
-static int apply_quirk(int quirk)
-{
-	switch (quirk) {
-	case AC97_TUNE_NONE:
-		return 0;
-	case AC97_TUNE_HP_ONLY:
-		return swap_headphone(1);
-	case AC97_TUNE_SWAP_HP:
-		return swap_headphone(0);
-	case AC97_TUNE_SWAP_SURROUND:
-		return -ENOSYS; /* not yet implemented */
-	case AC97_TUNE_AD_SHARING:
-		return -ENOSYS; /* not yet implemented */
-	case AC97_TUNE_ALC_JACK:
-		return -ENOSYS; /* not yet implemented */
-	}
-	return -EINVAL;
-}
-
-/**
- *	ac97_tune_hardware - tune up the hardware
- *	@pdev: pci_dev pointer
- *	@quirk: quirk list
- *	@override: explicit quirk value (overrides if not AC97_TUNE_DEFAULT)
- *
- *	Do some workaround for each pci device, such as renaming of the
- *	headphone (true line-out) control as "Master".
- *	The quirk-list must be terminated with a zero-filled entry.
- *
- *	Returns zero if successful, or a negative error code on failure.
- */
-
-int ac97_tune_hardware(struct pci_dev *pdev, struct ac97_quirk *quirk, int override)
-{
-	int result;
-
-	if (!quirk)
-		return -EINVAL;
-
-	if (override != AC97_TUNE_DEFAULT) {
-		result = apply_quirk(override);
-		if (result < 0)
-			printk(KERN_ERR "applying quirk type %d failed (%d)\n", override, result);
-		return result;
-	}
-
-	for (; quirk->vendor; quirk++) {
-		if (quirk->vendor != pdev->subsystem_vendor)
-			continue;
-		if ((! quirk->mask && quirk->device == pdev->subsystem_device) ||
-		    quirk->device == (quirk->mask & pdev->subsystem_device)) {
-#ifdef DEBUG
-			printk("ac97 quirk for %s (%04x:%04x)\n", quirk->name, ac97->subsystem_vendor, pdev->subsystem_device);
-#endif
-			result = apply_quirk(quirk->type);
-			if (result < 0)
-				printk(KERN_ERR "applying quirk type %d for %s failed (%d)\n", quirk->type, quirk->name, result);
-			return result;
-		}
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ac97_tune_hardware);
-
-MODULE_LICENSE("GPL");
diff --git a/sound/oss/btaudio.c b/sound/oss/btaudio.c
deleted file mode 100644
index 4d5cf05b8922..000000000000
--- a/sound/oss/btaudio.c
+++ /dev/null
@@ -1,1139 +0,0 @@
-/*
-    btaudio - bt878 audio dma driver for linux 2.4.x
-
-    (c) 2000-2002 Gerd Knorr <kraxel@bytesex.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-*/
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/sound.h>
-#include <linux/soundcard.h>
-#include <linux/slab.h>
-#include <linux/kdev_t.h>
-#include <linux/mutex.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-
-/* mmio access */
-#define btwrite(dat,adr)    writel((dat), (bta->mmio+(adr)))
-#define btread(adr)         readl(bta->mmio+(adr))
-
-#define btand(dat,adr)      btwrite((dat) & btread(adr), adr)
-#define btor(dat,adr)       btwrite((dat) | btread(adr), adr)
-#define btaor(dat,mask,adr) btwrite((dat) | ((mask) & btread(adr)), adr)
-
-/* registers (shifted because bta->mmio is long) */
-#define REG_INT_STAT      (0x100 >> 2)
-#define REG_INT_MASK      (0x104 >> 2)
-#define REG_GPIO_DMA_CTL  (0x10c >> 2)
-#define REG_PACKET_LEN    (0x110 >> 2)
-#define REG_RISC_STRT_ADD (0x114 >> 2)
-#define REG_RISC_COUNT    (0x120 >> 2)
-
-/* IRQ bits - REG_INT_(STAT|MASK) */
-#define IRQ_SCERR         (1 << 19)
-#define IRQ_OCERR         (1 << 18)
-#define IRQ_PABORT        (1 << 17)
-#define IRQ_RIPERR        (1 << 16)
-#define IRQ_PPERR         (1 << 15)
-#define IRQ_FDSR          (1 << 14)
-#define IRQ_FTRGT         (1 << 13)
-#define IRQ_FBUS          (1 << 12)
-#define IRQ_RISCI         (1 << 11)
-#define IRQ_OFLOW         (1 <<  3)
-
-#define IRQ_BTAUDIO       (IRQ_SCERR | IRQ_OCERR | IRQ_PABORT | IRQ_RIPERR |\
-			   IRQ_PPERR | IRQ_FDSR  | IRQ_FTRGT  | IRQ_FBUS   |\
-			   IRQ_RISCI)
-
-/* REG_GPIO_DMA_CTL bits */
-#define DMA_CTL_A_PWRDN   (1 << 26)
-#define DMA_CTL_DA_SBR    (1 << 14)
-#define DMA_CTL_DA_ES2    (1 << 13)
-#define DMA_CTL_ACAP_EN   (1 <<  4)
-#define DMA_CTL_RISC_EN   (1 <<  1)
-#define DMA_CTL_FIFO_EN   (1 <<  0)
-
-/* RISC instructions */
-#define RISC_WRITE        (0x01 << 28)
-#define RISC_JUMP         (0x07 << 28)
-#define RISC_SYNC         (0x08 << 28)
-
-/* RISC bits */
-#define RISC_WR_SOL       (1 << 27)
-#define RISC_WR_EOL       (1 << 26)
-#define RISC_IRQ          (1 << 24)
-#define RISC_SYNC_RESYNC  (1 << 15)
-#define RISC_SYNC_FM1     0x06
-#define RISC_SYNC_VRO     0x0c
-
-#define HWBASE_AD (448000)
-
-/* -------------------------------------------------------------- */
-
-struct btaudio {
-	/* linked list */
-	struct btaudio *next;
-
-	/* device info */
-	int            dsp_digital;
-	int            dsp_analog;
-	int            mixer_dev;
-	struct pci_dev *pci;
-	unsigned int   irq;
-	unsigned long  mem;
-	unsigned long  __iomem *mmio;
-
-	/* locking */
-	int            users;
-	struct mutex lock;
-
-	/* risc instructions */
-	unsigned int   risc_size;
-	unsigned long  *risc_cpu;
-	dma_addr_t     risc_dma;
-
-	/* audio data */
-	unsigned int   buf_size;
-	unsigned char  *buf_cpu;
-	dma_addr_t     buf_dma;
-
-	/* buffer setup */
-	int line_bytes;
-	int line_count;
-	int block_bytes;
-	int block_count;
-
-	/* read fifo management */
-	int recording;
-	int dma_block;
-	int read_offset;
-	int read_count;
-	wait_queue_head_t readq;
-
-	/* settings */
-	int gain[3];
-	int source;
-	int bits;
-	int decimation;
-	int mixcount;
-	int sampleshift;
-	int channels;
-	int analog;
-	int rate;
-};
-
-struct cardinfo {
-	char *name;
-	int rate;
-};
-
-static struct btaudio *btaudios;
-static unsigned int debug;
-static unsigned int irq_debug;
-
-/* -------------------------------------------------------------- */
-
-#define BUF_DEFAULT 128*1024
-#define BUF_MIN         8192
-
-static int alloc_buffer(struct btaudio *bta)
-{
-	if (NULL == bta->buf_cpu) {
-		for (bta->buf_size = BUF_DEFAULT; bta->buf_size >= BUF_MIN;
-		     bta->buf_size = bta->buf_size >> 1) {
-			bta->buf_cpu = pci_alloc_consistent
-				(bta->pci, bta->buf_size, &bta->buf_dma);
-			if (NULL != bta->buf_cpu)
-				break;
-		}
-		if (NULL == bta->buf_cpu)
-			return -ENOMEM;
-		memset(bta->buf_cpu,0,bta->buf_size);
-	}
-	if (NULL == bta->risc_cpu) {
-		bta->risc_size = PAGE_SIZE;
-		bta->risc_cpu = pci_alloc_consistent
-			(bta->pci, bta->risc_size, &bta->risc_dma);
-		if (NULL == bta->risc_cpu) {
-			pci_free_consistent(bta->pci, bta->buf_size, bta->buf_cpu, bta->buf_dma);
-			bta->buf_cpu = NULL;
-			return -ENOMEM;
-		}
-	}
-	return 0;
-}
-
-static void free_buffer(struct btaudio *bta)
-{
-	if (NULL != bta->buf_cpu) {
-		pci_free_consistent(bta->pci, bta->buf_size,
-				    bta->buf_cpu, bta->buf_dma);
-		bta->buf_cpu = NULL;
-	}
-	if (NULL != bta->risc_cpu) {
-		pci_free_consistent(bta->pci, bta->risc_size,
-				    bta->risc_cpu, bta->risc_dma);
-		bta->risc_cpu = NULL;
-	}
-}
-
-static int make_risc(struct btaudio *bta)
-{
-	int rp, bp, line, block;
-	unsigned long risc;
-
-	bta->block_bytes = bta->buf_size >> 4;
-	bta->block_count = 1 << 4;
-	bta->line_bytes  = bta->block_bytes;
-	bta->line_count  = bta->block_count;
-	while (bta->line_bytes > 4095) {
-		bta->line_bytes >>= 1;
-		bta->line_count <<= 1;
-	}
-	if (bta->line_count > 255)
-		return -EINVAL;
-	if (debug)
-		printk(KERN_DEBUG
-		       "btaudio: bufsize=%d - bs=%d bc=%d - ls=%d, lc=%d\n",
-		       bta->buf_size,bta->block_bytes,bta->block_count,
-		       bta->line_bytes,bta->line_count);
-        rp = 0; bp = 0;
-	block = 0;
-	bta->risc_cpu[rp++] = cpu_to_le32(RISC_SYNC|RISC_SYNC_FM1);
-	bta->risc_cpu[rp++] = cpu_to_le32(0);
-	for (line = 0; line < bta->line_count; line++) {
-		risc  = RISC_WRITE | RISC_WR_SOL | RISC_WR_EOL;
-		risc |= bta->line_bytes;
-		if (0 == (bp & (bta->block_bytes-1))) {
-			risc |= RISC_IRQ;
-			risc |= (block  & 0x0f) << 16;
-			risc |= (~block & 0x0f) << 20;
-			block++;
-		}
-		bta->risc_cpu[rp++] = cpu_to_le32(risc);
-		bta->risc_cpu[rp++] = cpu_to_le32(bta->buf_dma + bp);
-		bp += bta->line_bytes;
-	}
-	bta->risc_cpu[rp++] = cpu_to_le32(RISC_SYNC|RISC_SYNC_VRO);
-	bta->risc_cpu[rp++] = cpu_to_le32(0);
-	bta->risc_cpu[rp++] = cpu_to_le32(RISC_JUMP); 
-	bta->risc_cpu[rp++] = cpu_to_le32(bta->risc_dma);
-	return 0;
-}
-
-static int start_recording(struct btaudio *bta)
-{
-	int ret;
-
-	if (0 != (ret = alloc_buffer(bta)))
-		return ret;
-	if (0 != (ret = make_risc(bta)))
-		return ret;
-
-	btwrite(bta->risc_dma, REG_RISC_STRT_ADD);
-	btwrite((bta->line_count << 16) | bta->line_bytes,
-		REG_PACKET_LEN);
-	btwrite(IRQ_BTAUDIO, REG_INT_MASK);
-	if (bta->analog) {
-		btwrite(DMA_CTL_ACAP_EN |
-			DMA_CTL_RISC_EN |
-			DMA_CTL_FIFO_EN |
-			DMA_CTL_DA_ES2  |
-			((bta->bits == 8) ? DMA_CTL_DA_SBR : 0) |
-			(bta->gain[bta->source] << 28) |
-			(bta->source            << 24) |
-			(bta->decimation        <<  8),
-			REG_GPIO_DMA_CTL);
-	} else {
-		btwrite(DMA_CTL_ACAP_EN |
-			DMA_CTL_RISC_EN |
-			DMA_CTL_FIFO_EN |
-			DMA_CTL_DA_ES2  |
-			DMA_CTL_A_PWRDN |
-			(1 << 6)   |
-			((bta->bits == 8) ? DMA_CTL_DA_SBR : 0) |
-			(bta->gain[bta->source] << 28) |
-			(bta->source            << 24) |
-			(bta->decimation        <<  8),
-			REG_GPIO_DMA_CTL);
-	}
-	bta->dma_block = 0;
-	bta->read_offset = 0;
-	bta->read_count = 0;
-	bta->recording = 1;
-	if (debug)
-		printk(KERN_DEBUG "btaudio: recording started\n");
-	return 0;
-}
-
-static void stop_recording(struct btaudio *bta)
-{
-        btand(~15, REG_GPIO_DMA_CTL);
-	bta->recording = 0;
-	if (debug)
-		printk(KERN_DEBUG "btaudio: recording stopped\n");
-}
-
-
-/* -------------------------------------------------------------- */
-
-static int btaudio_mixer_open(struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct btaudio *bta;
-
-	for (bta = btaudios; bta != NULL; bta = bta->next)
-		if (bta->mixer_dev == minor)
-			break;
-	if (NULL == bta)
-		return -ENODEV;
-
-	if (debug)
-		printk("btaudio: open mixer [%d]\n",minor);
-	file->private_data = bta;
-	return 0;
-}
-
-static int btaudio_mixer_release(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static int btaudio_mixer_ioctl(struct inode *inode, struct file *file,
-			       unsigned int cmd, unsigned long arg)
-{
-	struct btaudio *bta = file->private_data;
-	int ret,val=0,i=0;
-	void __user *argp = (void __user *)arg;
-
-	if (cmd == SOUND_MIXER_INFO) {
-		mixer_info info;
-		memset(&info,0,sizeof(info));
-                strlcpy(info.id,"bt878",sizeof(info.id));
-                strlcpy(info.name,"Brooktree Bt878 audio",sizeof(info.name));
-                info.modify_counter = bta->mixcount;
-                if (copy_to_user(argp, &info, sizeof(info)))
-                        return -EFAULT;
-		return 0;
-	}
-	if (cmd == SOUND_OLD_MIXER_INFO) {
-		_old_mixer_info info;
-		memset(&info,0,sizeof(info));
-                strlcpy(info.id, "bt878", sizeof(info.id));
-                strlcpy(info.name,"Brooktree Bt878 audio",sizeof(info.name));
-                if (copy_to_user(argp, &info, sizeof(info)))
-                        return -EFAULT;
-		return 0;
-	}
-	if (cmd == OSS_GETVERSION)
-		return put_user(SOUND_VERSION, (int __user *)argp);
-
-	/* read */
-	if (_SIOC_DIR(cmd) & _SIOC_WRITE)
-		if (get_user(val, (int __user *)argp))
-			return -EFAULT;
-
-	switch (cmd) {
-	case MIXER_READ(SOUND_MIXER_CAPS):
-		ret = SOUND_CAP_EXCL_INPUT;
-		break;
-	case MIXER_READ(SOUND_MIXER_STEREODEVS):
-		ret = 0;
-		break;
-	case MIXER_READ(SOUND_MIXER_RECMASK):
-	case MIXER_READ(SOUND_MIXER_DEVMASK):
-		ret = SOUND_MASK_LINE1|SOUND_MASK_LINE2|SOUND_MASK_LINE3;
-		break;
-
-	case MIXER_WRITE(SOUND_MIXER_RECSRC):
-		if (val & SOUND_MASK_LINE1 && bta->source != 0)
-			bta->source = 0;
-		else if (val & SOUND_MASK_LINE2 && bta->source != 1)
-			bta->source = 1;
-		else if (val & SOUND_MASK_LINE3 && bta->source != 2)
-			bta->source = 2;
-		btaor((bta->gain[bta->source] << 28) |
-		      (bta->source            << 24),
-		      0x0cffffff, REG_GPIO_DMA_CTL);
-	case MIXER_READ(SOUND_MIXER_RECSRC):
-		switch (bta->source) {
-		case 0:  ret = SOUND_MASK_LINE1; break;
-		case 1:  ret = SOUND_MASK_LINE2; break;
-		case 2:  ret = SOUND_MASK_LINE3; break;
-		default: ret = 0;
-		}
-		break;
-
-	case MIXER_WRITE(SOUND_MIXER_LINE1):
-	case MIXER_WRITE(SOUND_MIXER_LINE2):
-	case MIXER_WRITE(SOUND_MIXER_LINE3):
-		if (MIXER_WRITE(SOUND_MIXER_LINE1) == cmd)
-			i = 0;
-		if (MIXER_WRITE(SOUND_MIXER_LINE2) == cmd)
-			i = 1;
-		if (MIXER_WRITE(SOUND_MIXER_LINE3) == cmd)
-			i = 2;
-		bta->gain[i] = (val & 0xff) * 15 / 100;
-		if (bta->gain[i] > 15) bta->gain[i] = 15;
-		if (bta->gain[i] <  0) bta->gain[i] =  0;
-		if (i == bta->source)
-			btaor((bta->gain[bta->source]<<28),
-			      0x0fffffff, REG_GPIO_DMA_CTL);
-		ret  = bta->gain[i] * 100 / 15;
-		ret |= ret << 8;
-		break;
-
-	case MIXER_READ(SOUND_MIXER_LINE1):
-	case MIXER_READ(SOUND_MIXER_LINE2):
-	case MIXER_READ(SOUND_MIXER_LINE3):
-		if (MIXER_READ(SOUND_MIXER_LINE1) == cmd)
-			i = 0;
-		if (MIXER_READ(SOUND_MIXER_LINE2) == cmd)
-			i = 1;
-		if (MIXER_READ(SOUND_MIXER_LINE3) == cmd)
-			i = 2;
-		ret  = bta->gain[i] * 100 / 15;
-		ret |= ret << 8;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-	if (put_user(ret, (int __user *)argp))
-		return -EFAULT;
-	return 0;
-}
-
-static const struct file_operations btaudio_mixer_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.open		= btaudio_mixer_open,
-	.release	= btaudio_mixer_release,
-	.ioctl		= btaudio_mixer_ioctl,
-};
-
-/* -------------------------------------------------------------- */
-
-static int btaudio_dsp_open(struct inode *inode, struct file *file,
-			    struct btaudio *bta, int analog)
-{
-	mutex_lock(&bta->lock);
-	if (bta->users)
-		goto busy;
-	bta->users++;
-	file->private_data = bta;
-
-	bta->analog = analog;
-	bta->dma_block = 0;
-	bta->read_offset = 0;
-	bta->read_count = 0;
-	bta->sampleshift = 0;
-
-	mutex_unlock(&bta->lock);
-	return 0;
-
- busy:
-	mutex_unlock(&bta->lock);
-	return -EBUSY;
-}
-
-static int btaudio_dsp_open_digital(struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct btaudio *bta;
-
-	for (bta = btaudios; bta != NULL; bta = bta->next)
-		if (bta->dsp_digital == minor)
-			break;
-	if (NULL == bta)
-		return -ENODEV;
-	
-	if (debug)
-		printk("btaudio: open digital dsp [%d]\n",minor);
-	return btaudio_dsp_open(inode,file,bta,0);
-}
-
-static int btaudio_dsp_open_analog(struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct btaudio *bta;
-
-	for (bta = btaudios; bta != NULL; bta = bta->next)
-		if (bta->dsp_analog == minor)
-			break;
-	if (NULL == bta)
-		return -ENODEV;
-
-	if (debug)
-		printk("btaudio: open analog dsp [%d]\n",minor);
-	return btaudio_dsp_open(inode,file,bta,1);
-}
-
-static int btaudio_dsp_release(struct inode *inode, struct file *file)
-{
-	struct btaudio *bta = file->private_data;
-
-	mutex_lock(&bta->lock);
-	if (bta->recording)
-		stop_recording(bta);
-	bta->users--;
-	mutex_unlock(&bta->lock);
-	return 0;
-}
-
-static ssize_t btaudio_dsp_read(struct file *file, char __user *buffer,
-				size_t swcount, loff_t *ppos)
-{
-	struct btaudio *bta = file->private_data;
-	int hwcount = swcount << bta->sampleshift;
-	int nsrc, ndst, err, ret = 0;
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&bta->readq, &wait);
-	mutex_lock(&bta->lock);
-	while (swcount > 0) {
-		if (0 == bta->read_count) {
-			if (!bta->recording) {
-				if (0 != (err = start_recording(bta))) {
-					if (0 == ret)
-						ret = err;
-					break;
-				}
-			}
-			if (file->f_flags & O_NONBLOCK) {
-				if (0 == ret)
-					ret = -EAGAIN;
-				break;
-			}
-			mutex_unlock(&bta->lock);
-			current->state = TASK_INTERRUPTIBLE;
-			schedule();
-			mutex_lock(&bta->lock);
-			if(signal_pending(current)) {
-				if (0 == ret)
-					ret = -EINTR;
-				break;
-			}
-		}
-		nsrc = (bta->read_count < hwcount) ? bta->read_count : hwcount;
-		if (nsrc > bta->buf_size - bta->read_offset)
-			nsrc = bta->buf_size - bta->read_offset;
-		ndst = nsrc >> bta->sampleshift;
-		
-		if ((bta->analog  && 0 == bta->sampleshift) ||
-		    (!bta->analog && 2 == bta->channels)) {
-			/* just copy */
-			if (copy_to_user(buffer + ret, bta->buf_cpu + bta->read_offset, nsrc)) {
-				if (0 == ret)
-					ret = -EFAULT;
-				break;
-			}
-
-		} else if (!bta->analog) {
-			/* stereo => mono (digital audio) */
-			__s16 *src = (__s16*)(bta->buf_cpu + bta->read_offset);
-			__s16 __user *dst = (__s16 __user *)(buffer + ret);
-			__s16 avg;
-			int n = ndst>>1;
-			if (!access_ok(VERIFY_WRITE, dst, ndst)) {
-				if (0 == ret)
-					ret = -EFAULT;
-				break;
-			}
-			for (; n; n--, dst++) {
-				avg  = (__s16)le16_to_cpu(*src) / 2; src++;
-				avg += (__s16)le16_to_cpu(*src) / 2; src++;
-				__put_user(cpu_to_le16(avg),dst);
-			}
-
-		} else if (8 == bta->bits) {
-			/* copy + byte downsampling (audio A/D) */
-			__u8 *src = bta->buf_cpu + bta->read_offset;
-			__u8 __user *dst = buffer + ret;
-			int n = ndst;
-			if (!access_ok(VERIFY_WRITE, dst, ndst)) {
-				if (0 == ret)
-					ret = -EFAULT;
-				break;
-			}
-			for (; n; n--, src += (1 << bta->sampleshift), dst++)
-				__put_user(*src, dst);
-
-		} else {
-			/* copy + word downsampling (audio A/D) */
-			__u16 *src = (__u16*)(bta->buf_cpu + bta->read_offset);
-			__u16 __user *dst = (__u16 __user *)(buffer + ret);
-			int n = ndst>>1;
-			if (!access_ok(VERIFY_WRITE,dst,ndst)) {
-				if (0 == ret)
-					ret = -EFAULT;
-				break;
-			}
-			for (; n; n--, src += (1 << bta->sampleshift), dst++)
-				__put_user(*src, dst);
-		}
-
-		ret     += ndst;
-		swcount -= ndst;
-		hwcount -= nsrc;
-		bta->read_count  -= nsrc;
-		bta->read_offset += nsrc;
-		if (bta->read_offset == bta->buf_size)
-			bta->read_offset = 0;
-	}
-	mutex_unlock(&bta->lock);
-	remove_wait_queue(&bta->readq, &wait);
-	current->state = TASK_RUNNING;
-	return ret;
-}
-
-static ssize_t btaudio_dsp_write(struct file *file, const char __user *buffer,
-				 size_t count, loff_t *ppos)
-{
-	return -EINVAL;
-}
-
-static int btaudio_dsp_ioctl(struct inode *inode, struct file *file,
-			     unsigned int cmd, unsigned long arg)
-{
-	struct btaudio *bta = file->private_data;
-	int s, i, ret, val = 0;
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	
-        switch (cmd) {
-        case OSS_GETVERSION:
-                return put_user(SOUND_VERSION, p);
-        case SNDCTL_DSP_GETCAPS:
-		return 0;
-
-        case SNDCTL_DSP_SPEED:
-		if (get_user(val, p))
-			return -EFAULT;
-		if (bta->analog) {
-			for (s = 0; s < 16; s++)
-				if (val << s >= HWBASE_AD*4/15)
-					break;
-			for (i = 15; i >= 5; i--)
-				if (val << s <= HWBASE_AD*4/i)
-					break;
-			bta->sampleshift = s;
-			bta->decimation  = i;
-			if (debug)
-				printk(KERN_DEBUG "btaudio: rate: req=%d  "
-				       "dec=%d shift=%d hwrate=%d swrate=%d\n",
-				       val,i,s,(HWBASE_AD*4/i),(HWBASE_AD*4/i)>>s);
-		} else {
-			bta->sampleshift = (bta->channels == 2) ? 0 : 1;
-			bta->decimation  = 0;
-		}
-		if (bta->recording) {
-			mutex_lock(&bta->lock);
-			stop_recording(bta);
-			start_recording(bta);
-			mutex_unlock(&bta->lock);
-		}
-		/* fall through */
-        case SOUND_PCM_READ_RATE:
-		if (bta->analog) {
-			return put_user(HWBASE_AD*4/bta->decimation>>bta->sampleshift, p);
-		} else {
-			return put_user(bta->rate, p);
-		}
-
-        case SNDCTL_DSP_STEREO:
-		if (!bta->analog) {
-			if (get_user(val, p))
-				return -EFAULT;
-			bta->channels    = (val > 0) ? 2 : 1;
-			bta->sampleshift = (bta->channels == 2) ? 0 : 1;
-			if (debug)
-				printk(KERN_INFO
-				       "btaudio: stereo=%d channels=%d\n",
-				       val,bta->channels);
-		} else {
-			if (val == 1)
-				return -EFAULT;
-			else {
-				bta->channels = 1;
-				if (debug)
-					printk(KERN_INFO
-					       "btaudio: stereo=0 channels=1\n");
-			}
-		}
-		return put_user((bta->channels)-1, p);
-
-        case SNDCTL_DSP_CHANNELS:
-		if (!bta->analog) {
-			if (get_user(val, p))
-				return -EFAULT;
-			bta->channels    = (val > 1) ? 2 : 1;
-			bta->sampleshift = (bta->channels == 2) ? 0 : 1;
-			if (debug)
-				printk(KERN_DEBUG
-				       "btaudio: val=%d channels=%d\n",
-				       val,bta->channels);
-		}
-		/* fall through */
-        case SOUND_PCM_READ_CHANNELS:
-		return put_user(bta->channels, p);
-		
-        case SNDCTL_DSP_GETFMTS: /* Returns a mask */
-		if (bta->analog)
-			return put_user(AFMT_S16_LE|AFMT_S8, p);
-		else
-			return put_user(AFMT_S16_LE, p);
-
-        case SNDCTL_DSP_SETFMT: /* Selects ONE fmt*/
-		if (get_user(val, p))
-			return -EFAULT;
-                if (val != AFMT_QUERY) {
-			if (bta->analog)
-				bta->bits = (val == AFMT_S8) ? 8 : 16;
-			else
-				bta->bits = 16;
-			if (bta->recording) {
-				mutex_lock(&bta->lock);
-				stop_recording(bta);
-				start_recording(bta);
-				mutex_unlock(&bta->lock);
-			}
-		}
-		if (debug)
-			printk(KERN_DEBUG "btaudio: fmt: bits=%d\n",bta->bits);
-                return put_user((bta->bits==16) ? AFMT_S16_LE : AFMT_S8,
-				p);
-		break;
-        case SOUND_PCM_READ_BITS:
-		return put_user(bta->bits, p);
-
-        case SNDCTL_DSP_NONBLOCK:
-                file->f_flags |= O_NONBLOCK;
-                return 0;
-
-        case SNDCTL_DSP_RESET:
-		if (bta->recording) {
-			mutex_lock(&bta->lock);
-			stop_recording(bta);
-			mutex_unlock(&bta->lock);
-		}
-		return 0;
-        case SNDCTL_DSP_GETBLKSIZE:
-		if (!bta->recording) {
-			if (0 != (ret = alloc_buffer(bta)))
-				return ret;
-			if (0 != (ret = make_risc(bta)))
-				return ret;
-		}
-		return put_user(bta->block_bytes>>bta->sampleshift,p);
-
-        case SNDCTL_DSP_SYNC:
-		/* NOP */
-		return 0;
-	case SNDCTL_DSP_GETISPACE:
-	{
-		audio_buf_info info;
-		if (!bta->recording)
-			return -EINVAL;
-		info.fragsize = bta->block_bytes>>bta->sampleshift;
-		info.fragstotal = bta->block_count;
-		info.bytes = bta->read_count;
-		info.fragments = info.bytes / info.fragsize;
-		if (debug)
-			printk(KERN_DEBUG "btaudio: SNDCTL_DSP_GETISPACE "
-			       "returns %d/%d/%d/%d\n",
-			       info.fragsize, info.fragstotal,
-			       info.bytes, info.fragments);
-		if (copy_to_user(argp, &info, sizeof(info)))
-			return -EFAULT;
-		return 0;
-	}
-#if 0 /* TODO */
-        case SNDCTL_DSP_GETTRIGGER:
-        case SNDCTL_DSP_SETTRIGGER:
-        case SNDCTL_DSP_SETFRAGMENT:
-#endif
-	default:
-		return -EINVAL;
-	}
-}
-
-static unsigned int btaudio_dsp_poll(struct file *file, struct poll_table_struct *wait)
-{
-	struct btaudio *bta = file->private_data;
-	unsigned int mask = 0;
-
-	poll_wait(file, &bta->readq, wait);
-
-	if (0 != bta->read_count)
-		mask |= (POLLIN | POLLRDNORM);
-
-	return mask;
-}
-
-static const struct file_operations btaudio_digital_dsp_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.open		= btaudio_dsp_open_digital,
-	.release	= btaudio_dsp_release,
-	.read		= btaudio_dsp_read,
-	.write		= btaudio_dsp_write,
-	.ioctl		= btaudio_dsp_ioctl,
-	.poll		= btaudio_dsp_poll,
-};
-
-static const struct file_operations btaudio_analog_dsp_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.open		= btaudio_dsp_open_analog,
-	.release	= btaudio_dsp_release,
-	.read		= btaudio_dsp_read,
-	.write		= btaudio_dsp_write,
-	.ioctl		= btaudio_dsp_ioctl,
-	.poll		= btaudio_dsp_poll,
-};
-
-/* -------------------------------------------------------------- */
-
-static char *irq_name[] = { "", "", "", "OFLOW", "", "", "", "", "", "", "",
-			    "RISCI", "FBUS", "FTRGT", "FDSR", "PPERR",
-			    "RIPERR", "PABORT", "OCERR", "SCERR" };
-
-static irqreturn_t btaudio_irq(int irq, void *dev_id)
-{
-	int count = 0;
-	u32 stat,astat;
-	struct btaudio *bta = dev_id;
-	int handled = 0;
-
-	for (;;) {
-		count++;
-		stat  = btread(REG_INT_STAT);
-		astat = stat & btread(REG_INT_MASK);
-		if (!astat)
-			return IRQ_RETVAL(handled);
-		handled = 1;
-		btwrite(astat,REG_INT_STAT);
-
-		if (irq_debug) {
-			int i;
-			printk(KERN_DEBUG "btaudio: irq loop=%d risc=%x, bits:",
-			       count, stat>>28);
-			for (i = 0; i < (sizeof(irq_name)/sizeof(char*)); i++) {
-				if (stat & (1 << i))
-					printk(" %s",irq_name[i]);
-				if (astat & (1 << i))
-					printk("*");
-			}
-			printk("\n");
-		}
-		if (stat & IRQ_RISCI) {
-			int blocks;
-			blocks = (stat >> 28) - bta->dma_block;
-			if (blocks < 0)
-				blocks += bta->block_count;
-			bta->dma_block = stat >> 28;
-			if (bta->read_count + 2*bta->block_bytes > bta->buf_size) {
-				stop_recording(bta);
-				printk(KERN_INFO "btaudio: buffer overrun\n");
-			}
-			if (blocks > 0) {
-				bta->read_count += blocks * bta->block_bytes;
-				wake_up_interruptible(&bta->readq);
-			}
-		}
-		if (count > 10) {
-			printk(KERN_WARNING
-			       "btaudio: Oops - irq mask cleared\n");
-			btwrite(0, REG_INT_MASK);
-		}
-	}
-	return IRQ_NONE;
-}
-
-/* -------------------------------------------------------------- */
-
-static unsigned int dsp1 = -1;
-static unsigned int dsp2 = -1;
-static unsigned int mixer = -1;
-static int latency = -1;
-static int digital = 1;
-static int analog = 1;
-static int rate;
-
-#define BTA_OSPREY200 1
-
-static struct cardinfo cards[] = {
-	[0] = {
-		.name	= "default",
-		.rate	= 32000,
-	},
-	[BTA_OSPREY200] = {
-		.name	= "Osprey 200",
-		.rate	= 44100,
-	},
-};
-
-static int __devinit btaudio_probe(struct pci_dev *pci_dev,
-				   const struct pci_device_id *pci_id)
-{
-	struct btaudio *bta;
-	struct cardinfo *card = &cards[pci_id->driver_data];
-	unsigned char revision,lat;
-	int rc = -EBUSY;
-
-	if (pci_enable_device(pci_dev))
-		return -EIO;
-	if (!request_mem_region(pci_resource_start(pci_dev,0),
-				pci_resource_len(pci_dev,0),
-				"btaudio")) {
-		return -EBUSY;
-	}
-
-	bta = kzalloc(sizeof(*bta),GFP_ATOMIC);
-	if (!bta) {
-		rc = -ENOMEM;
-		goto fail0;
-	}
-
-	bta->pci  = pci_dev;
-	bta->irq  = pci_dev->irq;
-	bta->mem  = pci_resource_start(pci_dev,0);
-	bta->mmio = ioremap(pci_resource_start(pci_dev,0),
-			    pci_resource_len(pci_dev,0));
-
-	bta->source     = 1;
-	bta->bits       = 8;
-	bta->channels   = 1;
-	if (bta->analog) {
-		bta->decimation  = 15;
-	} else {
-		bta->decimation  = 0;
-		bta->sampleshift = 1;
-	}
-
-	/* sample rate */
-	bta->rate = card->rate;
-	if (rate)
-		bta->rate = rate;
-	
-	mutex_init(&bta->lock);
-        init_waitqueue_head(&bta->readq);
-
-	if (-1 != latency) {
-		printk(KERN_INFO "btaudio: setting pci latency timer to %d\n",
-		       latency);
-		pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, latency);
-	}
-        pci_read_config_byte(pci_dev, PCI_CLASS_REVISION, &revision);
-        pci_read_config_byte(pci_dev, PCI_LATENCY_TIMER, &lat);
-        printk(KERN_INFO "btaudio: Bt%x (rev %d) at %02x:%02x.%x, ",
-	       pci_dev->device,revision,pci_dev->bus->number,
-	       PCI_SLOT(pci_dev->devfn),PCI_FUNC(pci_dev->devfn));
-        printk("irq: %d, latency: %d, mmio: 0x%lx\n",
-	       bta->irq, lat, bta->mem);
-	printk("btaudio: using card config \"%s\"\n", card->name);
-
-	/* init hw */
-        btwrite(0, REG_GPIO_DMA_CTL);
-        btwrite(0, REG_INT_MASK);
-        btwrite(~0U, REG_INT_STAT);
-	pci_set_master(pci_dev);
-
-	if ((rc = request_irq(bta->irq, btaudio_irq, IRQF_SHARED|IRQF_DISABLED,
-			      "btaudio",(void *)bta)) < 0) {
-		printk(KERN_WARNING
-		       "btaudio: can't request irq (rc=%d)\n",rc);
-		goto fail1;
-	}
-
-	/* register devices */
-	if (digital) {
-		rc = bta->dsp_digital =
-			register_sound_dsp(&btaudio_digital_dsp_fops,dsp1);
-		if (rc < 0) {
-			printk(KERN_WARNING
-			       "btaudio: can't register digital dsp (rc=%d)\n",rc);
-			goto fail2;
-		}
-		printk(KERN_INFO "btaudio: registered device dsp%d [digital]\n",
-		       bta->dsp_digital >> 4);
-	}
-	if (analog) {
-		rc = bta->dsp_analog =
-			register_sound_dsp(&btaudio_analog_dsp_fops,dsp2);
-		if (rc < 0) {
-			printk(KERN_WARNING
-			       "btaudio: can't register analog dsp (rc=%d)\n",rc);
-			goto fail3;
-		}
-		printk(KERN_INFO "btaudio: registered device dsp%d [analog]\n",
-		       bta->dsp_analog >> 4);
-		rc = bta->mixer_dev = register_sound_mixer(&btaudio_mixer_fops,mixer);
-		if (rc < 0) {
-			printk(KERN_WARNING
-			       "btaudio: can't register mixer (rc=%d)\n",rc);
-			goto fail4;
-		}
-		printk(KERN_INFO "btaudio: registered device mixer%d\n",
-		       bta->mixer_dev >> 4);
-	}
-
-	/* hook into linked list */
-	bta->next = btaudios;
-	btaudios = bta;
-
-	pci_set_drvdata(pci_dev,bta);
-        return 0;
-
- fail4:
-	unregister_sound_dsp(bta->dsp_analog);
- fail3:
-	if (digital)
-		unregister_sound_dsp(bta->dsp_digital);
- fail2:
-        free_irq(bta->irq,bta);	
- fail1:
-	iounmap(bta->mmio);
-	kfree(bta);
- fail0:
-	release_mem_region(pci_resource_start(pci_dev,0),
-			   pci_resource_len(pci_dev,0));
-	return rc;
-}
-
-static void __devexit btaudio_remove(struct pci_dev *pci_dev)
-{
-	struct btaudio *bta = pci_get_drvdata(pci_dev);
-	struct btaudio *walk;
-
-	/* turn off all DMA / IRQs */
-        btand(~15, REG_GPIO_DMA_CTL);
-        btwrite(0, REG_INT_MASK);
-        btwrite(~0U, REG_INT_STAT);
-
-	/* unregister devices */
-	if (digital) {
-		unregister_sound_dsp(bta->dsp_digital);
-	}
-	if (analog) {
-		unregister_sound_dsp(bta->dsp_analog);
-		unregister_sound_mixer(bta->mixer_dev);
-	}
-
-	/* free resources */
-	free_buffer(bta);
-        free_irq(bta->irq,bta);
-	release_mem_region(pci_resource_start(pci_dev,0),
-			   pci_resource_len(pci_dev,0));
-	iounmap(bta->mmio);
-
-	/* remove from linked list */
-	if (bta == btaudios) {
-		btaudios = NULL;
-	} else {
-		for (walk = btaudios; walk->next != bta; walk = walk->next)
-			; /* if (NULL == walk->next) BUG(); */
-		walk->next = bta->next;
-	}
-
-	pci_set_drvdata(pci_dev, NULL);
-	kfree(bta);
-	return;
-}
-
-/* -------------------------------------------------------------- */
-
-static struct pci_device_id btaudio_pci_tbl[] = {
-        {
-		.vendor		= PCI_VENDOR_ID_BROOKTREE,
-		.device		= 0x0878,
-		.subvendor	= 0x0070,
-		.subdevice	= 0xff01,
-		.driver_data	= BTA_OSPREY200,
-	},{
-		.vendor		= PCI_VENDOR_ID_BROOKTREE,
-		.device		= 0x0878,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-	},{
-		.vendor		= PCI_VENDOR_ID_BROOKTREE,
-		.device		= 0x0878,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-        },{
-		/* --- end of list --- */
-	}
-};
-
-static struct pci_driver btaudio_pci_driver = {
-        .name		= "btaudio",
-        .id_table	= btaudio_pci_tbl,
-        .probe		= btaudio_probe,
-        .remove		=  __devexit_p(btaudio_remove),
-};
-
-static int btaudio_init_module(void)
-{
-	printk(KERN_INFO "btaudio: driver version 0.7 loaded [%s%s%s]\n",
-	       digital ? "digital" : "",
-	       analog && digital ? "+" : "",
-	       analog ? "analog" : "");
-	return pci_register_driver(&btaudio_pci_driver);
-}
-
-static void btaudio_cleanup_module(void)
-{
-	pci_unregister_driver(&btaudio_pci_driver);
-	return;
-}
-
-module_init(btaudio_init_module);
-module_exit(btaudio_cleanup_module);
-
-module_param(dsp1, int, S_IRUGO);
-module_param(dsp2, int, S_IRUGO);
-module_param(mixer, int, S_IRUGO);
-module_param(debug, int, S_IRUGO | S_IWUSR);
-module_param(irq_debug, int, S_IRUGO | S_IWUSR);
-module_param(digital, int, S_IRUGO);
-module_param(analog, int, S_IRUGO);
-module_param(rate, int, S_IRUGO);
-module_param(latency, int, S_IRUGO);
-MODULE_PARM_DESC(latency,"pci latency timer");
-
-MODULE_DEVICE_TABLE(pci, btaudio_pci_tbl);
-MODULE_DESCRIPTION("bt878 audio dma driver");
-MODULE_AUTHOR("Gerd Knorr");
-MODULE_LICENSE("GPL");
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/sound/oss/cs4232.c b/sound/oss/cs4232.c
deleted file mode 100644
index de40e21bf279..000000000000
--- a/sound/oss/cs4232.c
+++ /dev/null
@@ -1,526 +0,0 @@
-/*
- * Copyright (C) by Hannu Savolainen 1993-1997
- *
- *	cs4232.c
- *
- * The low level driver for Crystal CS4232 based cards. The CS4232 is
- * a PnP compatible chip which contains a CS4231A codec, SB emulation,
- * a MPU401 compatible MIDI port, joystick and synthesizer and IDE CD-ROM 
- * interfaces. This is just a temporary driver until full PnP support
- * gets implemented. Just the WSS codec, FM synth and the MIDI ports are
- * supported. Other interfaces are left uninitialized.
- *
- * ifdef ...WAVEFRONT...
- * 
- *   Support is provided for initializing the WaveFront synth
- *   interface as well, which is logical device #4. Note that if
- *   you have a Tropez+ card, you probably don't need to setup
- *   the CS4232-supported MIDI interface, since it corresponds to
- *   the internal 26-pin header that's hard to access. Using this
- *   requires an additional IRQ, a resource none too plentiful in
- *   this environment. Just don't set module parameters mpuio and
- *   mpuirq, and the MIDI port will be left uninitialized. You can
- *   still use the ICS2115 hosted MIDI interface which corresponds
- *   to the 9-pin D connector on the back of the card.
- *
- * endif  ...WAVEFRONT...
- *
- * Supported chips are:
- *      CS4232
- *      CS4236
- *      CS4236B
- *
- * Note: You will need a PnP config setup to initialise some CS4232 boards
- * anyway.
- *
- * Changes
- *      John Rood               Added Bose Sound System Support.
- *      Toshio Spoor
- *	Alan Cox		Modularisation, Basic cleanups.
- *      Paul Barton-Davis	Separated MPU configuration, added
- *                                       Tropez+ (WaveFront) support
- *	Christoph Hellwig	Adapted to module_init/module_exit,
- * 					simple cleanups
- * 	Arnaldo C. de Melo	got rid of attach_uart401
- *	Bartlomiej Zolnierkiewicz
- *				Added some __init/__initdata/__exit
- *	Marcus Meissner		Added ISA PnP support.
- */
-
-#include <linux/pnp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include "sound_config.h"
-
-#include "ad1848.h"
-#include "mpu401.h"
-
-#define KEY_PORT	0x279	/* Same as LPT1 status port */
-#define CSN_NUM		0x99	/* Just a random number */
-#define INDEX_ADDRESS   0x00    /* (R0) Index Address Register */
-#define INDEX_DATA      0x01    /* (R1) Indexed Data Register */
-#define PIN_CONTROL     0x0a    /* (I10) Pin Control */
-#define ENABLE_PINS     0xc0    /* XCTRL0/XCTRL1 enable */
-
-static void CS_OUT(unsigned char a)
-{
-	outb(a, KEY_PORT);
-}
-
-#define CS_OUT2(a, b)		{CS_OUT(a);CS_OUT(b);}
-#define CS_OUT3(a, b, c)	{CS_OUT(a);CS_OUT(b);CS_OUT(c);}
-
-static int __initdata bss       = 0;
-static int mpu_base, mpu_irq;
-static int synth_base, synth_irq;
-static int mpu_detected;
-
-static int probe_cs4232_mpu(struct address_info *hw_config)
-{
-	/*
-	 *	Just write down the config values.
-	 */
-
-	mpu_base = hw_config->io_base;
-	mpu_irq = hw_config->irq;
-
-	return 1;
-}
-
-static unsigned char crystal_key[] =	/* A 32 byte magic key sequence */
-{
-	0x96, 0x35, 0x9a, 0xcd, 0xe6, 0xf3, 0x79, 0xbc,
-	0x5e, 0xaf, 0x57, 0x2b, 0x15, 0x8a, 0xc5, 0xe2,
-	0xf1, 0xf8, 0x7c, 0x3e, 0x9f, 0x4f, 0x27, 0x13,
-	0x09, 0x84, 0x42, 0xa1, 0xd0, 0x68, 0x34, 0x1a
-};
-
-static void sleep(unsigned howlong)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(howlong);
-}
-
-static void enable_xctrl(int baseio)
-{
-        unsigned char regd;
-                
-        /*
-         * Some IBM Aptiva's have the Bose Sound System. By default
-         * the Bose Amplifier is disabled. The amplifier will be 
-         * activated, by setting the XCTRL0 and XCTRL1 bits.
-         * Volume of the monitor bose speakers/woofer, can then
-         * be set by changing the PCM volume.
-         *
-         */
-                
-        printk("cs4232: enabling Bose Sound System Amplifier.\n");
-        
-        /* Switch to Pin Control Address */                   
-        regd = inb(baseio + INDEX_ADDRESS) & 0xe0;
-        outb(((unsigned char) (PIN_CONTROL | regd)), baseio + INDEX_ADDRESS );
-        
-        /* Activate the XCTRL0 and XCTRL1 Pins */
-        regd = inb(baseio + INDEX_DATA);
-        outb(((unsigned char) (ENABLE_PINS | regd)), baseio + INDEX_DATA );
-}
-
-static int __init probe_cs4232(struct address_info *hw_config, int isapnp_configured)
-{
-	int i, n;
-	int base = hw_config->io_base, irq = hw_config->irq;
-	int dma1 = hw_config->dma, dma2 = hw_config->dma2;
-	struct resource *ports;
-
-	if (base == -1 || irq == -1 || dma1 == -1) {
-		printk(KERN_ERR "cs4232: dma, irq and io must be set.\n");
-		return 0;
-	}
-
-	/*
-	 * Verify that the I/O port range is free.
-	 */
-
-	ports = request_region(base, 4, "ad1848");
-	if (!ports) {
-		printk(KERN_ERR "cs4232.c: I/O port 0x%03x not free\n", base);
-		return 0;
-	}
-	if (ad1848_detect(ports, NULL, hw_config->osp)) {
-		goto got_it;	/* The card is already active */
-	}
-	if (isapnp_configured) {
-		printk(KERN_ERR "cs4232.c: ISA PnP configured, but not detected?\n");
-		goto fail;
-	}
-
-	/*
-	 * This version of the driver doesn't use the PnP method when configuring
-	 * the card but a simplified method defined by Crystal. This means that
-	 * just one CS4232 compatible device can exist on the system. Also this
-	 * method conflicts with possible PnP support in the OS. For this reason 
-	 * driver is just a temporary kludge.
-	 *
-	 * Also the Cirrus/Crystal method doesn't always work. Try ISA PnP first ;)
-	 */
-
-	/*
-	 * Repeat initialization few times since it doesn't always succeed in
-	 * first time.
-	 */
-
-	for (n = 0; n < 4; n++)
-	{	
-		/*
-		 *	Wake up the card by sending a 32 byte Crystal key to the key port.
-		 */
-		
-		for (i = 0; i < 32; i++)
-			CS_OUT(crystal_key[i]);
-
-		sleep(HZ / 10);
-
-		/*
-		 *	Now set the CSN (Card Select Number).
-		 */
-
-		CS_OUT2(0x06, CSN_NUM);
-
-		/*
-		 *	Then set some config bytes. First logical device 0 
-		 */
-
-		CS_OUT2(0x15, 0x00);	/* Select logical device 0 (WSS/SB/FM) */
-		CS_OUT3(0x47, (base >> 8) & 0xff, base & 0xff);	/* WSS base */
-
-		if (!request_region(0x388, 4, "FM"))	/* Not free */
-			CS_OUT3(0x48, 0x00, 0x00)	/* FM base off */
-		else {
-			release_region(0x388, 4);
-			CS_OUT3(0x48, 0x03, 0x88);	/* FM base 0x388 */
-		}
-
-		CS_OUT3(0x42, 0x00, 0x00);	/* SB base off */
-		CS_OUT2(0x22, irq);		/* SB+WSS IRQ */
-		CS_OUT2(0x2a, dma1);		/* SB+WSS DMA */
-
-		if (dma2 != -1)
-			CS_OUT2(0x25, dma2)	/* WSS DMA2 */
-		else
-			CS_OUT2(0x25, 4);	/* No WSS DMA2 */
-
-		CS_OUT2(0x33, 0x01);	/* Activate logical dev 0 */
-
-		sleep(HZ / 10);
-
-		/*
-		 * Initialize logical device 3 (MPU)
-		 */
-
-		if (mpu_base != 0 && mpu_irq != 0)
-		{
-			CS_OUT2(0x15, 0x03);	/* Select logical device 3 (MPU) */
-			CS_OUT3(0x47, (mpu_base >> 8) & 0xff, mpu_base & 0xff);	/* MPU base */
-			CS_OUT2(0x22, mpu_irq);	/* MPU IRQ */
-			CS_OUT2(0x33, 0x01);	/* Activate logical dev 3 */
-		}
-
-		if(synth_base != 0)
-		{
-		    CS_OUT2 (0x15, 0x04);	        /* logical device 4 (WaveFront) */
-		    CS_OUT3 (0x47, (synth_base >> 8) & 0xff,
-			     synth_base & 0xff);	/* base */
-		    CS_OUT2 (0x22, synth_irq);     	/* IRQ */
-		    CS_OUT2 (0x33, 0x01);	        /* Activate logical dev 4 */
-		}
-
-		/*
-		 * Finally activate the chip
-		 */
-		
-		CS_OUT(0x79);
-
-		sleep(HZ / 5);
-
-		/*
-		 * Then try to detect the codec part of the chip
-		 */
-
-		if (ad1848_detect(ports, NULL, hw_config->osp))
-			goto got_it;
-		
-		sleep(HZ);
-	}
-fail:
-	release_region(base, 4);
-	return 0;
-
-got_it:
-	if (dma2 == -1)
-		dma2 = dma1;
-
-	hw_config->slots[0] = ad1848_init("Crystal audio controller", ports,
-					  irq,
-					  dma1,		/* Playback DMA */
-					  dma2,		/* Capture DMA */
-					  0,
-					  hw_config->osp,
-					  THIS_MODULE);
-
-	if (hw_config->slots[0] != -1 &&
-		audio_devs[hw_config->slots[0]]->mixer_dev!=-1)
-	{	
-		/* Assume the mixer map is as suggested in the CS4232 databook */
-		AD1848_REROUTE(SOUND_MIXER_LINE1, SOUND_MIXER_LINE);
-		AD1848_REROUTE(SOUND_MIXER_LINE2, SOUND_MIXER_CD);
-		AD1848_REROUTE(SOUND_MIXER_LINE3, SOUND_MIXER_SYNTH);		/* FM synth */
-	}
-	if (mpu_base != 0 && mpu_irq != 0)
-	{
-		static struct address_info hw_config2 = {
-			0
-		};		/* Ensure it's initialized */
-
-		hw_config2.io_base = mpu_base;
-		hw_config2.irq = mpu_irq;
-		hw_config2.dma = -1;
-		hw_config2.dma2 = -1;
-		hw_config2.always_detect = 0;
-		hw_config2.name = NULL;
-		hw_config2.driver_use_1 = 0;
-		hw_config2.driver_use_2 = 0;
-		hw_config2.card_subtype = 0;
-
-		if (probe_uart401(&hw_config2, THIS_MODULE))
-		{
-			mpu_detected = 1;
-		}
-		else
-		{
-			mpu_base = mpu_irq = 0;
-		}
-		hw_config->slots[1] = hw_config2.slots[1];
-	}
-	
-	if (bss)
-        	enable_xctrl(base);
-
-	return 1;
-}
-
-static void __devexit unload_cs4232(struct address_info *hw_config)
-{
-	int base = hw_config->io_base, irq = hw_config->irq;
-	int dma1 = hw_config->dma, dma2 = hw_config->dma2;
-
-	if (dma2 == -1)
-		dma2 = dma1;
-
-	ad1848_unload(base,
-		      irq,
-		      dma1,	/* Playback DMA */
-		      dma2,	/* Capture DMA */
-		      0);
-
-	sound_unload_audiodev(hw_config->slots[0]);
-	if (mpu_base != 0 && mpu_irq != 0 && mpu_detected)
-	{
-		static struct address_info hw_config2 =
-		{
-			0
-		};		/* Ensure it's initialized */
-
-		hw_config2.io_base = mpu_base;
-		hw_config2.irq = mpu_irq;
-		hw_config2.dma = -1;
-		hw_config2.dma2 = -1;
-		hw_config2.always_detect = 0;
-		hw_config2.name = NULL;
-		hw_config2.driver_use_1 = 0;
-		hw_config2.driver_use_2 = 0;
-		hw_config2.card_subtype = 0;
-		hw_config2.slots[1] = hw_config->slots[1];
-
-		unload_uart401(&hw_config2);
-	}
-}
-
-static struct address_info cfg;
-static struct address_info cfg_mpu;
-
-static int __initdata io	= -1;
-static int __initdata irq	= -1;
-static int __initdata dma	= -1;
-static int __initdata dma2	= -1;
-static int __initdata mpuio	= -1;
-static int __initdata mpuirq	= -1;
-static int __initdata synthio	= -1;
-static int __initdata synthirq	= -1;
-static int __initdata isapnp	= 1;
-
-static unsigned int cs4232_devices;
-
-MODULE_DESCRIPTION("CS4232 based soundcard driver"); 
-MODULE_AUTHOR("Hannu Savolainen, Paul Barton-Davis"); 
-MODULE_LICENSE("GPL");
-
-module_param(io, int, 0);
-MODULE_PARM_DESC(io,"base I/O port for AD1848");
-module_param(irq, int, 0);
-MODULE_PARM_DESC(irq,"IRQ for AD1848 chip");
-module_param(dma, int, 0);
-MODULE_PARM_DESC(dma,"8 bit DMA for AD1848 chip");
-module_param(dma2, int, 0);
-MODULE_PARM_DESC(dma2,"16 bit DMA for AD1848 chip");
-module_param(mpuio, int, 0);
-MODULE_PARM_DESC(mpuio,"MPU 401 base address");
-module_param(mpuirq, int, 0);
-MODULE_PARM_DESC(mpuirq,"MPU 401 IRQ");
-module_param(synthio, int, 0);
-MODULE_PARM_DESC(synthio,"Maui WaveTable base I/O port");
-module_param(synthirq, int, 0);
-MODULE_PARM_DESC(synthirq,"Maui WaveTable IRQ");
-module_param(isapnp, bool, 0);
-MODULE_PARM_DESC(isapnp,"Enable ISAPnP probing (default 1)");
-module_param(bss, bool, 0);
-MODULE_PARM_DESC(bss,"Enable Bose Sound System Support (default 0)");
-
-/*
- *	Install a CS4232 based card. Need to have ad1848 and mpu401
- *	loaded ready.
- */
-
-/* All cs4232 based cards have the main ad1848 card either as CSC0000 or
- * CSC0100. */
-static const struct pnp_device_id cs4232_pnp_table[] = {
-	{ .id = "CSC0100", .driver_data = 0 },
-	{ .id = "CSC0000", .driver_data = 0 },
-	/* Guillemot Turtlebeach something appears to be cs4232 compatible
-	 * (untested) */
-	{ .id = "GIM0100", .driver_data = 0 },
-	{ .id = ""}
-};
-
-MODULE_DEVICE_TABLE(pnp, cs4232_pnp_table);
-
-static int __init cs4232_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
-{
-	struct address_info *isapnpcfg;
-
-	isapnpcfg = kmalloc(sizeof(*isapnpcfg),GFP_KERNEL);
-	if (!isapnpcfg)
-		return -ENOMEM;
-
-	isapnpcfg->irq		= pnp_irq(dev, 0);
-	isapnpcfg->dma		= pnp_dma(dev, 0);
-	isapnpcfg->dma2		= pnp_dma(dev, 1);
-	isapnpcfg->io_base	= pnp_port_start(dev, 0);
-	if (probe_cs4232(isapnpcfg,TRUE) == 0) {
-		printk(KERN_ERR "cs4232: ISA PnP card found, but not detected?\n");
-		kfree(isapnpcfg);
-		return -ENODEV;
-	}
-	pnp_set_drvdata(dev,isapnpcfg);
-	cs4232_devices++;
-	return 0;
-}
-
-static void __devexit cs4232_pnp_remove(struct pnp_dev *dev)
-{
-	struct address_info *cfg = pnp_get_drvdata(dev);
-	if (cfg) {
-		unload_cs4232(cfg);
-		kfree(cfg);
-	}
-}
-
-static struct pnp_driver cs4232_driver = {
-	.name		= "cs4232",
-	.id_table	= cs4232_pnp_table,
-	.probe		= cs4232_pnp_probe,
-	.remove		= __devexit_p(cs4232_pnp_remove),
-};
-
-static int __init init_cs4232(void)
-{
-#ifdef CONFIG_SOUND_WAVEFRONT_MODULE
-	if(synthio == -1)
-		printk(KERN_INFO "cs4232: set synthio and synthirq to use the wavefront facilities.\n");
-	else {
-		synth_base = synthio;
-		synth_irq =  synthirq;
-	}
-#else
-	if(synthio != -1)
-		printk(KERN_WARNING "cs4232: wavefront support not enabled in this driver.\n");
-#endif
-	cfg.irq = -1;
-
-	if (isapnp) {
-		pnp_register_driver(&cs4232_driver);
-		if (cs4232_devices)
-			return 0;
-	}
-
-	if(io==-1||irq==-1||dma==-1)
-	{
-		printk(KERN_ERR "cs4232: Must set io, irq and dma.\n");
-		return -ENODEV;
-	}
-
-	cfg.io_base = io;
-	cfg.irq = irq;
-	cfg.dma = dma;
-	cfg.dma2 = dma2;
-
-	cfg_mpu.io_base = -1;
-	cfg_mpu.irq = -1;
-
-	if (mpuio != -1 && mpuirq != -1) {
-		cfg_mpu.io_base = mpuio;
-		cfg_mpu.irq = mpuirq;
-		probe_cs4232_mpu(&cfg_mpu); /* Bug always returns 0 not OK -- AC */
-	}
-
-	if (probe_cs4232(&cfg,FALSE) == 0)
-		return -ENODEV;
-
-	return 0;
-}
-
-static void __exit cleanup_cs4232(void)
-{
-	pnp_unregister_driver(&cs4232_driver);
-        if (cfg.irq != -1)
-		unload_cs4232(&cfg); /* Unloads global MPU as well, if needed */
-}
-
-module_init(init_cs4232);
-module_exit(cleanup_cs4232);
-
-#ifndef MODULE
-static int __init setup_cs4232(char *str)
-{
-	/* io, irq, dma, dma2 mpuio, mpuirq*/
-	int ints[7];
-
-	/* If we have isapnp cards, no need for options */
-	pnp_register_driver(&cs4232_driver);
-	if (cs4232_devices)
-		return 1;
-	
-	str = get_options(str, ARRAY_SIZE(ints), ints);
-	
-	io	= ints[1];
-	irq	= ints[2];
-	dma	= ints[3];
-	dma2	= ints[4];
-	mpuio	= ints[5];
-	mpuirq	= ints[6];
-
-	return 1;
-}
-
-__setup("cs4232=", setup_cs4232);
-#endif
diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c
deleted file mode 100644
index f5e31f11973d..000000000000
--- a/sound/oss/i810_audio.c
+++ /dev/null
@@ -1,3656 +0,0 @@
-/*
- *	Intel i810 and friends ICH driver for Linux
- *	Alan Cox <alan@redhat.com>
- *
- *  Built from:
- *	Low level code:  Zach Brown (original nonworking i810 OSS driver)
- *			 Jaroslav Kysela <perex@suse.cz> (working ALSA driver)
- *
- *	Framework: Thomas Sailer <sailer@ife.ee.ethz.ch>
- *	Extended by: Zach Brown <zab@redhat.com>  
- *			and others..
- *
- *  Hardware Provided By:
- *	Analog Devices (A major AC97 codec maker)
- *	Intel Corp  (you've probably heard of them already)
- *
- *  AC97 clues and assistance provided by
- *	Analog Devices
- *	Zach 'Fufu' Brown
- *	Jeff Garzik
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
- *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *
- *	Intel 810 theory of operation
- *
- *	The chipset provides three DMA channels that talk to an AC97
- *	CODEC (AC97 is a digital/analog mixer standard). At its simplest
- *	you get 48Khz audio with basic volume and mixer controls. At the
- *	best you get rate adaption in the codec. We set the card up so
- *	that we never take completion interrupts but instead keep the card
- *	chasing its tail around a ring buffer. This is needed for mmap
- *	mode audio and happens to work rather well for non-mmap modes too.
- *
- *	The board has one output channel for PCM audio (supported) and
- *	a stereo line in and mono microphone input. Again these are normally
- *	locked to 48Khz only. Right now recording is not finished.
- *
- *	There is no midi support, no synth support. Use timidity. To get
- *	esd working you need to use esd -r 48000 as it won't probe 48KHz
- *	by default. mpg123 can't handle 48Khz only audio so use xmms.
- *
- *	Fix The Sound On Dell
- *
- *	Not everyone uses 48KHz. We know of no way to detect this reliably
- *	and certainly not to get the right data. If your i810 audio sounds
- *	stupid you may need to investigate other speeds. According to Analog
- *	they tend to use a 14.318MHz clock which gives you a base rate of
- *	41194Hz.
- *
- *	This is available via the 'ftsodell=1' option. 
- *
- *	If you need to force a specific rate set the clocking= option
- *
- *	This driver is cursed. (Ben LaHaise)
- *
- *  ICH 3 caveats
- *	Intel errata #7 for ICH3 IO. We need to disable SMI stuff
- *	when codec probing. [Not Yet Done]
- *
- *  ICH 4 caveats
- *
- *	The ICH4 has the feature, that the codec ID doesn't have to be 
- *	congruent with the IO connection.
- * 
- *	Therefore, from driver version 0.23 on, there is a "codec ID" <->
- *	"IO register base offset" mapping (card->ac97_id_map) field.
- *   
- *	Juergen "George" Sawinski (jsaw) 
- */
- 
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/ioport.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/sound.h>
-#include <linux/slab.h>
-#include <linux/soundcard.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/ac97_codec.h>
-#include <linux/bitops.h>
-#include <linux/mutex.h>
-#include <linux/mm.h>
-
-#include <asm/uaccess.h>
-
-#define DRIVER_VERSION "1.01"
-
-#define MODULOP2(a, b) ((a) & ((b) - 1))
-#define MASKP2(a, b) ((a) & ~((b) - 1))
-
-static int ftsodell;
-static int strict_clocking;
-static unsigned int clocking;
-static int spdif_locked;
-static int ac97_quirk = AC97_TUNE_DEFAULT;
-
-//#define DEBUG
-//#define DEBUG2
-//#define DEBUG_INTERRUPTS
-//#define DEBUG_MMAP
-//#define DEBUG_MMIO
-
-#define ADC_RUNNING	1
-#define DAC_RUNNING	2
-
-#define I810_FMT_16BIT	1
-#define I810_FMT_STEREO	2
-#define I810_FMT_MASK	3
-
-#define SPDIF_ON	0x0004
-#define SURR_ON		0x0010
-#define CENTER_LFE_ON	0x0020
-#define VOL_MUTED	0x8000
-
-/* the 810's array of pointers to data buffers */
-
-struct sg_item {
-#define BUSADDR_MASK	0xFFFFFFFE
-	u32 busaddr;	
-#define CON_IOC 	0x80000000 /* interrupt on completion */
-#define CON_BUFPAD	0x40000000 /* pad underrun with last sample, else 0 */
-#define CON_BUFLEN_MASK	0x0000ffff /* buffer length in samples */
-	u32 control;
-};
-
-/* an instance of the i810 channel */
-#define SG_LEN 32
-struct i810_channel 
-{
-	/* these sg guys should probably be allocated
-	   separately as nocache. Must be 8 byte aligned */
-	struct sg_item sg[SG_LEN];	/* 32*8 */
-	u32 offset;			/* 4 */
-	u32 port;			/* 4 */
-	u32 used;
-	u32 num;
-};
-
-/*
- * we have 3 separate dma engines.  pcm in, pcm out, and mic.
- * each dma engine has controlling registers.  These goofy
- * names are from the datasheet, but make it easy to write
- * code while leafing through it.
- *
- * ICH4 has 6 dma engines, pcm in, pcm out, mic, pcm in 2, 
- * mic in 2, s/pdif.   Of special interest is the fact that
- * the upper 3 DMA engines on the ICH4 *must* be accessed
- * via mmio access instead of pio access.
- */
-
-#define ENUM_ENGINE(PRE,DIG) 									\
-enum {												\
-	PRE##_BASE =	0x##DIG##0,		/* Base Address */				\
-	PRE##_BDBAR =	0x##DIG##0,		/* Buffer Descriptor list Base Address */	\
-	PRE##_CIV =	0x##DIG##4,		/* Current Index Value */			\
-	PRE##_LVI =	0x##DIG##5,		/* Last Valid Index */				\
-	PRE##_SR =	0x##DIG##6,		/* Status Register */				\
-	PRE##_PICB =	0x##DIG##8,		/* Position In Current Buffer */		\
-	PRE##_PIV =	0x##DIG##a,		/* Prefetched Index Value */			\
-	PRE##_CR =	0x##DIG##b		/* Control Register */				\
-}
-
-ENUM_ENGINE(OFF,0);	/* Offsets */
-ENUM_ENGINE(PI,0);	/* PCM In */
-ENUM_ENGINE(PO,1);	/* PCM Out */
-ENUM_ENGINE(MC,2);	/* Mic In */
-
-enum {
-	GLOB_CNT =	0x2c,			/* Global Control */
-	GLOB_STA = 	0x30,			/* Global Status */
-	CAS	 = 	0x34			/* Codec Write Semaphore Register */
-};
-
-ENUM_ENGINE(MC2,4);     /* Mic In 2 */
-ENUM_ENGINE(PI2,5);     /* PCM In 2 */
-ENUM_ENGINE(SP,6);      /* S/PDIF */
-
-enum {
-	SDM =           0x80                    /* SDATA_IN Map Register */
-};
-
-/* interrupts for a dma engine */
-#define DMA_INT_FIFO		(1<<4)  /* fifo under/over flow */
-#define DMA_INT_COMPLETE	(1<<3)  /* buffer read/write complete and ioc set */
-#define DMA_INT_LVI		(1<<2)  /* last valid done */
-#define DMA_INT_CELV		(1<<1)  /* last valid is current */
-#define DMA_INT_DCH		(1)	/* DMA Controller Halted (happens on LVI interrupts) */
-#define DMA_INT_MASK (DMA_INT_FIFO|DMA_INT_COMPLETE|DMA_INT_LVI)
-
-/* interrupts for the whole chip */
-#define INT_SEC		(1<<11)
-#define INT_PRI		(1<<10)
-#define INT_MC		(1<<7)
-#define INT_PO		(1<<6)
-#define INT_PI		(1<<5)
-#define INT_MO		(1<<2)
-#define INT_NI		(1<<1)
-#define INT_GPI		(1<<0)
-#define INT_MASK (INT_SEC|INT_PRI|INT_MC|INT_PO|INT_PI|INT_MO|INT_NI|INT_GPI)
-
-/* magic numbers to protect our data structures */
-#define I810_CARD_MAGIC		0x5072696E /* "Prin" */
-#define I810_STATE_MAGIC	0x63657373 /* "cess" */
-#define I810_DMA_MASK		0xffffffff /* DMA buffer mask for pci_alloc_consist */
-#define NR_HW_CH		3
-
-/* maxinum number of AC97 codecs connected, AC97 2.0 defined 4 */
-#define NR_AC97                 4
-
-/* Please note that an 8bit mono stream is not valid on this card, you must have a 16bit */
-/* stream at a minimum for this card to be happy */
-static const unsigned sample_size[] = { 1, 2, 2, 4 };
-/* Samples are 16bit values, so we are shifting to a word, not to a byte, hence shift */
-/* values are one less than might be expected */
-static const unsigned sample_shift[] = { -1, 0, 0, 1 };
-
-enum {
-	ICH82801AA = 0,
-	ICH82901AB,
-	INTEL440MX,
-	INTELICH2,
-	INTELICH3,
-	INTELICH4,
-	INTELICH5,
-	SI7012,
-	NVIDIA_NFORCE,
-	AMD768,
-	AMD8111
-};
-
-static char * card_names[] = {
-	"Intel ICH 82801AA",
-	"Intel ICH 82901AB",
-	"Intel 440MX",
-	"Intel ICH2",
-	"Intel ICH3",
-	"Intel ICH4",
-	"Intel ICH5",
-	"SiS 7012",
-	"NVIDIA nForce Audio",
-	"AMD 768",
-	"AMD-8111 IOHub"
-};
-
-/* These are capabilities (and bugs) the chipsets _can_ have */
-static struct {
-	int16_t      nr_ac97;
-#define CAP_MMIO                 0x0001
-#define CAP_20BIT_AUDIO_SUPPORT  0x0002
-	u_int16_t flags;
-} card_cap[] = {
-	{  1, 0x0000 }, /* ICH82801AA */
-	{  1, 0x0000 }, /* ICH82901AB */
-	{  1, 0x0000 }, /* INTEL440MX */
-	{  1, 0x0000 }, /* INTELICH2 */
-	{  2, 0x0000 }, /* INTELICH3 */
- 	{  3, 0x0003 }, /* INTELICH4 */
-	{  3, 0x0003 }, /* INTELICH5 */
-	/*@FIXME to be verified*/	{  2, 0x0000 }, /* SI7012 */
-	/*@FIXME to be verified*/	{  2, 0x0000 }, /* NVIDIA_NFORCE */
-	/*@FIXME to be verified*/	{  2, 0x0000 }, /* AMD768 */
-	/*@FIXME to be verified*/	{  3, 0x0001 }, /* AMD8111 */
-};
-
-static struct pci_device_id i810_pci_tbl [] = {
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, ICH82801AA},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, ICH82901AB},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_440MX,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTEL440MX},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_4,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH2},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH3},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH4},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH5},
-	{PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_7012,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, SI7012},
-	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_MCP1_AUDIO,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, NVIDIA_NFORCE},
-	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, NVIDIA_NFORCE},
-	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_MCP3_AUDIO,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, NVIDIA_NFORCE},
-	{PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_OPUS_7445,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD768},
-	{PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_AUDIO,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD8111},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_5,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH4},
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_18,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, INTELICH4},
-	{PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_AUDIO,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, NVIDIA_NFORCE},
-	{0,}
-};
-
-MODULE_DEVICE_TABLE (pci, i810_pci_tbl);
-
-#ifdef CONFIG_PM
-#define PM_SUSPENDED(card) (card->pm_suspended)
-#else
-#define PM_SUSPENDED(card) (0)
-#endif
-
-/* "software" or virtual channel, an instance of opened /dev/dsp */
-struct i810_state {
-	unsigned int magic;
-	struct i810_card *card;	/* Card info */
-
-	/* single open lock mechanism, only used for recording */
-	struct mutex open_mutex;
-	wait_queue_head_t open_wait;
-
-	/* file mode */
-	mode_t open_mode;
-
-	/* virtual channel number */
-	int virt;
-
-#ifdef CONFIG_PM
-	unsigned int pm_saved_dac_rate,pm_saved_adc_rate;
-#endif
-	struct dmabuf {
-		/* wave sample stuff */
-		unsigned int rate;
-		unsigned char fmt, enable, trigger;
-
-		/* hardware channel */
-		struct i810_channel *read_channel;
-		struct i810_channel *write_channel;
-
-		/* OSS buffer management stuff */
-		void *rawbuf;
-		dma_addr_t dma_handle;
-		unsigned buforder;
-		unsigned numfrag;
-		unsigned fragshift;
-
-		/* our buffer acts like a circular ring */
-		unsigned hwptr;		/* where dma last started, updated by update_ptr */
-		unsigned swptr;		/* where driver last clear/filled, updated by read/write */
-		int count;		/* bytes to be consumed or been generated by dma machine */
-		unsigned total_bytes;	/* total bytes dmaed by hardware */
-
-		unsigned error;		/* number of over/underruns */
-		wait_queue_head_t wait;	/* put process on wait queue when no more space in buffer */
-
-		/* redundant, but makes calculations easier */
-		/* what the hardware uses */
-		unsigned dmasize;
-		unsigned fragsize;
-		unsigned fragsamples;
-
-		/* what we tell the user to expect */
-		unsigned userfrags;
-		unsigned userfragsize;
-
-		/* OSS stuff */
-		unsigned mapped:1;
-		unsigned ready:1;
-		unsigned update_flag;
-		unsigned ossfragsize;
-		unsigned ossmaxfrags;
-		unsigned subdivision;
-	} dmabuf;
-};
-
-
-struct i810_card {
-	unsigned int magic;
-
-	/* We keep i810 cards in a linked list */
-	struct i810_card *next;
-
-	/* The i810 has a certain amount of cross channel interaction
-	   so we use a single per card lock */
-	spinlock_t lock;
-	
-	/* Control AC97 access serialization */
-	spinlock_t ac97_lock;
-
-	/* PCI device stuff */
-	struct pci_dev * pci_dev;
-	u16 pci_id;
-	u16 pci_id_internal; /* used to access card_cap[] */
-#ifdef CONFIG_PM	
-	u16 pm_suspended;
-	int pm_saved_mixer_settings[SOUND_MIXER_NRDEVICES][NR_AC97];
-#endif
-	/* soundcore stuff */
-	int dev_audio;
-
-	/* structures for abstraction of hardware facilities, codecs, banks and channels*/
-	u16    ac97_id_map[NR_AC97];
-	struct ac97_codec *ac97_codec[NR_AC97];
-	struct i810_state *states[NR_HW_CH];
-	struct i810_channel *channel;	/* 1:1 to states[] but diff. lifetime */
-	dma_addr_t chandma;
-
-	u16 ac97_features;
-	u16 ac97_status;
-	u16 channels;
-	
-	/* hardware resources */
-	unsigned long ac97base;
-	unsigned long iobase;
-	u32 irq;
-
-	unsigned long ac97base_mmio_phys;
-	unsigned long iobase_mmio_phys;
-	u_int8_t __iomem *ac97base_mmio;
-	u_int8_t __iomem *iobase_mmio;
-
-	int           use_mmio;
-	
-	/* Function support */
-	struct i810_channel *(*alloc_pcm_channel)(struct i810_card *);
-	struct i810_channel *(*alloc_rec_pcm_channel)(struct i810_card *);
-	struct i810_channel *(*alloc_rec_mic_channel)(struct i810_card *);
-	void (*free_pcm_channel)(struct i810_card *, int chan);
-
-	/* We have a *very* long init time possibly, so use this to block */
-	/* attempts to open our devices before we are ready (stops oops'es) */
-	int initializing;
-};
-
-/* extract register offset from codec struct */
-#define IO_REG_OFF(codec) (((struct i810_card *) codec->private_data)->ac97_id_map[codec->id])
-
-#define I810_IOREAD(size, type, card, off)				\
-({									\
-	type val;							\
-	if (card->use_mmio)						\
-		val=read##size(card->iobase_mmio+off);			\
-	else								\
-		val=in##size(card->iobase+off);				\
-	val;								\
-})
-
-#define I810_IOREADL(card, off)		I810_IOREAD(l, u32, card, off)
-#define I810_IOREADW(card, off)		I810_IOREAD(w, u16, card, off)
-#define I810_IOREADB(card, off)		I810_IOREAD(b, u8,  card, off)
-
-#define I810_IOWRITE(size, val, card, off)				\
-({									\
-	if (card->use_mmio)						\
-		write##size(val, card->iobase_mmio+off);		\
-	else								\
-		out##size(val, card->iobase+off);			\
-})
-
-#define I810_IOWRITEL(val, card, off)	I810_IOWRITE(l, val, card, off)
-#define I810_IOWRITEW(val, card, off)	I810_IOWRITE(w, val, card, off)
-#define I810_IOWRITEB(val, card, off)	I810_IOWRITE(b, val, card, off)
-
-#define GET_CIV(card, port) MODULOP2(I810_IOREADB((card), (port) + OFF_CIV), SG_LEN)
-#define GET_LVI(card, port) MODULOP2(I810_IOREADB((card), (port) + OFF_LVI), SG_LEN)
-
-/* set LVI from CIV */
-#define CIV_TO_LVI(card, port, off) \
-	I810_IOWRITEB(MODULOP2(GET_CIV((card), (port)) + (off), SG_LEN), (card), (port) + OFF_LVI)
-
-static struct ac97_quirk ac97_quirks[] __devinitdata = {
-	{
-		.vendor = 0x0e11,
-		.device = 0x00b8,
-		.name = "Compaq Evo D510C",
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1028,
-		.device = 0x00d8,
-		.name = "Dell Precision 530",   /* AD1885 */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1028,
-		.device = 0x0126,
-		.name = "Dell Optiplex GX260",  /* AD1981A */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1028,
-		.device = 0x012d,
-		.name = "Dell Precision 450",   /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{       /* FIXME: which codec? */
-		.vendor = 0x103c,
-		.device = 0x00c3,
-		.name = "Hewlett-Packard onboard",
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x103c,
-		.device = 0x12f1,
-		.name = "HP xw8200",    /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x103c,
-		.device = 0x3008,
-		.name = "HP xw4200",    /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x10f1,
-		.device = 0x2665,
-		.name = "Fujitsu-Siemens Celsius",      /* AD1981? */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x10f1,
-		.device = 0x2885,
-		.name = "AMD64 Mobo",   /* ALC650 */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x110a,
-		.device = 0x0056,
-		.name = "Fujitsu-Siemens Scenic",       /* AD1981? */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x11d4,
-		.device = 0x5375,
-		.name = "ADI AD1985 (discrete)",
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1462,
-		.device = 0x5470,
-		.name = "MSI P4 ATX 645 Ultra",
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1734,
-		.device = 0x0088,
-		.name = "Fujitsu-Siemens D1522",	/* AD1981 */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x8086,
-		.device = 0x4856,
-		.name = "Intel D845WN (82801BA)",
-		.type = AC97_TUNE_SWAP_HP
-	},
-	{
-		.vendor = 0x8086,
-		.device = 0x4d44,
-		.name = "Intel D850EMV2",       /* AD1885 */
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x8086,
-		.device = 0x4d56,
-		.name = "Intel ICH/AD1885",
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x1028,
-		.device = 0x012d,
-		.name = "Dell Precision 450",   /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x103c,
-		.device = 0x3008,
-		.name = "HP xw4200",    /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{
-		.vendor = 0x103c,
-		.device = 0x12f1,
-		.name = "HP xw8200",    /* AD1981B*/
-		.type = AC97_TUNE_HP_ONLY
-	},
-	{ } /* terminator */
-};
-
-static struct i810_card *devs = NULL;
-
-static int i810_open_mixdev(struct inode *inode, struct file *file);
-static int i810_ioctl_mixdev(struct inode *inode, struct file *file,
-			     unsigned int cmd, unsigned long arg);
-static u16 i810_ac97_get(struct ac97_codec *dev, u8 reg);
-static void i810_ac97_set(struct ac97_codec *dev, u8 reg, u16 data);
-static u16 i810_ac97_get_mmio(struct ac97_codec *dev, u8 reg);
-static void i810_ac97_set_mmio(struct ac97_codec *dev, u8 reg, u16 data);
-static u16 i810_ac97_get_io(struct ac97_codec *dev, u8 reg);
-static void i810_ac97_set_io(struct ac97_codec *dev, u8 reg, u16 data);
-
-static struct i810_channel *i810_alloc_pcm_channel(struct i810_card *card)
-{
-	if(card->channel[1].used==1)
-		return NULL;
-	card->channel[1].used=1;
-	return &card->channel[1];
-}
-
-static struct i810_channel *i810_alloc_rec_pcm_channel(struct i810_card *card)
-{
-	if(card->channel[0].used==1)
-		return NULL;
-	card->channel[0].used=1;
-	return &card->channel[0];
-}
-
-static struct i810_channel *i810_alloc_rec_mic_channel(struct i810_card *card)
-{
-	if(card->channel[2].used==1)
-		return NULL;
-	card->channel[2].used=1;
-	return &card->channel[2];
-}
-
-static void i810_free_pcm_channel(struct i810_card *card, int channel)
-{
-	card->channel[channel].used=0;
-}
-
-static int i810_valid_spdif_rate ( struct ac97_codec *codec, int rate )
-{
-	unsigned long id = 0L;
-
-	id = (i810_ac97_get(codec, AC97_VENDOR_ID1) << 16);
-	id |= i810_ac97_get(codec, AC97_VENDOR_ID2) & 0xffff;
-#ifdef DEBUG
-	printk ( "i810_audio: codec = %s, codec_id = 0x%08lx\n", codec->name, id);
-#endif
-	switch ( id ) {
-		case 0x41445361: /* AD1886 */
-			if (rate == 48000) {
-				return 1;
-			}
-			break;
-		default: /* all other codecs, until we know otherwiae */
-			if (rate == 48000 || rate == 44100 || rate == 32000) {
-				return 1;
-			}
-			break;
-	}
-	return (0);
-}
-
-/* i810_set_spdif_output
- * 
- *  Configure the S/PDIF output transmitter. When we turn on
- *  S/PDIF, we turn off the analog output. This may not be
- *  the right thing to do.
- *
- *  Assumptions:
- *     The DSP sample rate must already be set to a supported
- *     S/PDIF rate (32kHz, 44.1kHz, or 48kHz) or we abort.
- */
-static int i810_set_spdif_output(struct i810_state *state, int slots, int rate)
-{
-	int	vol;
-	int	aud_reg;
-	int	r = 0;
-	struct ac97_codec *codec = state->card->ac97_codec[0];
-
-	if(!codec->codec_ops->digital) {
-		state->card->ac97_status &= ~SPDIF_ON;
-	} else {
-		if ( slots == -1 ) { /* Turn off S/PDIF */
-			codec->codec_ops->digital(codec, 0, 0, 0);
-			/* If the volume wasn't muted before we turned on S/PDIF, unmute it */
-			if ( !(state->card->ac97_status & VOL_MUTED) ) {
-				aud_reg = i810_ac97_get(codec, AC97_MASTER_VOL_STEREO);
-				i810_ac97_set(codec, AC97_MASTER_VOL_STEREO, (aud_reg & ~VOL_MUTED));
-			}
-			state->card->ac97_status &= ~(VOL_MUTED | SPDIF_ON);
-			return 0;
-		}
-
-		vol = i810_ac97_get(codec, AC97_MASTER_VOL_STEREO);
-		state->card->ac97_status = vol & VOL_MUTED;
-		
-		r = codec->codec_ops->digital(codec, slots, rate, 0);
-
-		if(r)
-			state->card->ac97_status |= SPDIF_ON;
-		else
-			state->card->ac97_status &= ~SPDIF_ON;
-
-		/* Mute the analog output */
-		/* Should this only mute the PCM volume??? */
-		i810_ac97_set(codec, AC97_MASTER_VOL_STEREO, (vol | VOL_MUTED));
-	}
-	return r;
-}
-
-/* i810_set_dac_channels
- *
- *  Configure the codec's multi-channel DACs
- *
- *  The logic is backwards. Setting the bit to 1 turns off the DAC. 
- *
- *  What about the ICH? We currently configure it using the
- *  SNDCTL_DSP_CHANNELS ioctl.  If we're turnning on the DAC, 
- *  does that imply that we want the ICH set to support
- *  these channels?
- *  
- *  TODO:
- *    vailidate that the codec really supports these DACs
- *    before turning them on. 
- */
-static void i810_set_dac_channels(struct i810_state *state, int channel)
-{
-	int	aud_reg;
-	struct ac97_codec *codec = state->card->ac97_codec[0];
-	
-	/* No codec, no setup */
-	
-	if(codec == NULL)
-		return;
-
-	aud_reg = i810_ac97_get(codec, AC97_EXTENDED_STATUS);
-	aud_reg |= AC97_EA_PRI | AC97_EA_PRJ | AC97_EA_PRK;
-	state->card->ac97_status &= ~(SURR_ON | CENTER_LFE_ON);
-
-	switch ( channel ) {
-		case 2: /* always enabled */
-			break;
-		case 4:
-			aud_reg &= ~AC97_EA_PRJ;
-			state->card->ac97_status |= SURR_ON;
-			break;
-		case 6:
-			aud_reg &= ~(AC97_EA_PRJ | AC97_EA_PRI | AC97_EA_PRK);
-			state->card->ac97_status |= SURR_ON | CENTER_LFE_ON;
-			break;
-		default:
-			break;
-	}
-	i810_ac97_set(codec, AC97_EXTENDED_STATUS, aud_reg);
-
-}
-
-
-/* set playback sample rate */
-static unsigned int i810_set_dac_rate(struct i810_state * state, unsigned int rate)
-{	
-	struct dmabuf *dmabuf = &state->dmabuf;
-	u32 new_rate;
-	struct ac97_codec *codec=state->card->ac97_codec[0];
-	
-	if(!(state->card->ac97_features&0x0001))
-	{
-		dmabuf->rate = clocking;
-#ifdef DEBUG
-		printk("Asked for %d Hz, but ac97_features says we only do %dHz.  Sorry!\n",
-		       rate,clocking);
-#endif		       
-		return clocking;
-	}
-			
-	if (rate > 48000)
-		rate = 48000;
-	if (rate < 8000)
-		rate = 8000;
-	dmabuf->rate = rate;
-		
-	/*
-	 *	Adjust for misclocked crap
-	 */
-	rate = ( rate * clocking)/48000;
-	if(strict_clocking && rate < 8000) {
-		rate = 8000;
-		dmabuf->rate = (rate * 48000)/clocking;
-	}
-
-        new_rate=ac97_set_dac_rate(codec, rate);
-	if(new_rate != rate) {
-		dmabuf->rate = (new_rate * 48000)/clocking;
-	}
-#ifdef DEBUG
-	printk("i810_audio: called i810_set_dac_rate : asked for %d, got %d\n", rate, dmabuf->rate);
-#endif
-	rate = new_rate;
-	return dmabuf->rate;
-}
-
-/* set recording sample rate */
-static unsigned int i810_set_adc_rate(struct i810_state * state, unsigned int rate)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	u32 new_rate;
-	struct ac97_codec *codec=state->card->ac97_codec[0];
-	
-	if(!(state->card->ac97_features&0x0001))
-	{
-		dmabuf->rate = clocking;
-		return clocking;
-	}
-			
-	if (rate > 48000)
-		rate = 48000;
-	if (rate < 8000)
-		rate = 8000;
-	dmabuf->rate = rate;
-
-	/*
-	 *	Adjust for misclocked crap
-	 */
-	 
-	rate = ( rate * clocking)/48000;
-	if(strict_clocking && rate < 8000) {
-		rate = 8000;
-		dmabuf->rate = (rate * 48000)/clocking;
-	}
-
-	new_rate = ac97_set_adc_rate(codec, rate);
-	
-	if(new_rate != rate) {
-		dmabuf->rate = (new_rate * 48000)/clocking;
-		rate = new_rate;
-	}
-#ifdef DEBUG
-	printk("i810_audio: called i810_set_adc_rate : rate = %d/%d\n", dmabuf->rate, rate);
-#endif
-	return dmabuf->rate;
-}
-
-/* get current playback/recording dma buffer pointer (byte offset from LBA),
-   called with spinlock held! */
-   
-static inline unsigned i810_get_dma_addr(struct i810_state *state, int rec)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned int civ, offset, port, port_picb, bytes = 2;
-	
-	if (!dmabuf->enable)
-		return 0;
-
-	if (rec)
-		port = dmabuf->read_channel->port;
-	else
-		port = dmabuf->write_channel->port;
-
-	if(state->card->pci_id == PCI_DEVICE_ID_SI_7012) {
-		port_picb = port + OFF_SR;
-		bytes = 1;
-	} else
-		port_picb = port + OFF_PICB;
-
-	do {
-		civ = GET_CIV(state->card, port);
-		offset = I810_IOREADW(state->card, port_picb);
-		/* Must have a delay here! */ 
-		if(offset == 0)
-			udelay(1);
-		/* Reread both registers and make sure that that total
-		 * offset from the first reading to the second is 0.
-		 * There is an issue with SiS hardware where it will count
-		 * picb down to 0, then update civ to the next value,
-		 * then set the new picb to fragsize bytes.  We can catch
-		 * it between the civ update and the picb update, making
-		 * it look as though we are 1 fragsize ahead of where we
-		 * are.  The next to we get the address though, it will
-		 * be back in the right place, and we will suddenly think
-		 * we just went forward dmasize - fragsize bytes, causing
-		 * totally stupid *huge* dma overrun messages.  We are
-		 * assuming that the 1us delay is more than long enough
-		 * that we won't have to worry about the chip still being
-		 * out of sync with reality ;-)
-		 */
-	} while (civ != GET_CIV(state->card, port) || offset != I810_IOREADW(state->card, port_picb));
-		 
-	return (((civ + 1) * dmabuf->fragsize - (bytes * offset))
-		% dmabuf->dmasize);
-}
-
-/* Stop recording (lock held) */
-static inline void __stop_adc(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct i810_card *card = state->card;
-
-	dmabuf->enable &= ~ADC_RUNNING;
-	I810_IOWRITEB(0, card, PI_CR);
-	// wait for the card to acknowledge shutdown
-	while( I810_IOREADB(card, PI_CR) != 0 ) ;
-	// now clear any latent interrupt bits (like the halt bit)
-	if(card->pci_id == PCI_DEVICE_ID_SI_7012)
-		I810_IOWRITEB( I810_IOREADB(card, PI_PICB), card, PI_PICB );
-	else
-		I810_IOWRITEB( I810_IOREADB(card, PI_SR), card, PI_SR );
-	I810_IOWRITEL( I810_IOREADL(card, GLOB_STA) & INT_PI, card, GLOB_STA);
-}
-
-static void stop_adc(struct i810_state *state)
-{
-	struct i810_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__stop_adc(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-static inline void __start_adc(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-
-	if (dmabuf->count < dmabuf->dmasize && dmabuf->ready && !dmabuf->enable &&
-	    (dmabuf->trigger & PCM_ENABLE_INPUT)) {
-		dmabuf->enable |= ADC_RUNNING;
-		// Interrupt enable, LVI enable, DMA enable
-		I810_IOWRITEB(0x10 | 0x04 | 0x01, state->card, PI_CR);
-	}
-}
-
-static void start_adc(struct i810_state *state)
-{
-	struct i810_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__start_adc(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-/* stop playback (lock held) */
-static inline void __stop_dac(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct i810_card *card = state->card;
-
-	dmabuf->enable &= ~DAC_RUNNING;
-	I810_IOWRITEB(0, card, PO_CR);
-	// wait for the card to acknowledge shutdown
-	while( I810_IOREADB(card, PO_CR) != 0 ) ;
-	// now clear any latent interrupt bits (like the halt bit)
-	if(card->pci_id == PCI_DEVICE_ID_SI_7012)
-		I810_IOWRITEB( I810_IOREADB(card, PO_PICB), card, PO_PICB );
-	else
-		I810_IOWRITEB( I810_IOREADB(card, PO_SR), card, PO_SR );
-	I810_IOWRITEL( I810_IOREADL(card, GLOB_STA) & INT_PO, card, GLOB_STA);
-}
-
-static void stop_dac(struct i810_state *state)
-{
-	struct i810_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__stop_dac(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}	
-
-static inline void __start_dac(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-
-	if (dmabuf->count > 0 && dmabuf->ready && !dmabuf->enable &&
-	    (dmabuf->trigger & PCM_ENABLE_OUTPUT)) {
-		dmabuf->enable |= DAC_RUNNING;
-		// Interrupt enable, LVI enable, DMA enable
-		I810_IOWRITEB(0x10 | 0x04 | 0x01, state->card, PO_CR);
-	}
-}
-static void start_dac(struct i810_state *state)
-{
-	struct i810_card *card = state->card;
-	unsigned long flags;
-
-	spin_lock_irqsave(&card->lock, flags);
-	__start_dac(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-#define DMABUF_DEFAULTORDER (16-PAGE_SHIFT)
-#define DMABUF_MINORDER 1
-
-/* allocate DMA buffer, playback and recording buffer should be allocated separately */
-static int alloc_dmabuf(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	void *rawbuf= NULL;
-	int order, size;
-	struct page *page, *pend;
-
-	/* If we don't have any oss frag params, then use our default ones */
-	if(dmabuf->ossmaxfrags == 0)
-		dmabuf->ossmaxfrags = 4;
-	if(dmabuf->ossfragsize == 0)
-		dmabuf->ossfragsize = (PAGE_SIZE<<DMABUF_DEFAULTORDER)/dmabuf->ossmaxfrags;
-	size = dmabuf->ossfragsize * dmabuf->ossmaxfrags;
-
-	if(dmabuf->rawbuf && (PAGE_SIZE << dmabuf->buforder) == size)
-		return 0;
-	/* alloc enough to satisfy the oss params */
-	for (order = DMABUF_DEFAULTORDER; order >= DMABUF_MINORDER; order--) {
-		if ( (PAGE_SIZE<<order) > size )
-			continue;
-		if ((rawbuf = pci_alloc_consistent(state->card->pci_dev,
-						   PAGE_SIZE << order,
-						   &dmabuf->dma_handle)))
-			break;
-	}
-	if (!rawbuf)
-		return -ENOMEM;
-
-
-#ifdef DEBUG
-	printk("i810_audio: allocated %ld (order = %d) bytes at %p\n",
-	       PAGE_SIZE << order, order, rawbuf);
-#endif
-
-	dmabuf->ready  = dmabuf->mapped = 0;
-	dmabuf->rawbuf = rawbuf;
-	dmabuf->buforder = order;
-	
-	/* now mark the pages as reserved; otherwise remap_pfn_range doesn't do what we want */
-	pend = virt_to_page(rawbuf + (PAGE_SIZE << order) - 1);
-	for (page = virt_to_page(rawbuf); page <= pend; page++)
-		SetPageReserved(page);
-
-	return 0;
-}
-
-/* free DMA buffer */
-static void dealloc_dmabuf(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct page *page, *pend;
-
-	if (dmabuf->rawbuf) {
-		/* undo marking the pages as reserved */
-		pend = virt_to_page(dmabuf->rawbuf + (PAGE_SIZE << dmabuf->buforder) - 1);
-		for (page = virt_to_page(dmabuf->rawbuf); page <= pend; page++)
-			ClearPageReserved(page);
-		pci_free_consistent(state->card->pci_dev, PAGE_SIZE << dmabuf->buforder,
-				    dmabuf->rawbuf, dmabuf->dma_handle);
-	}
-	dmabuf->rawbuf = NULL;
-	dmabuf->mapped = dmabuf->ready = 0;
-}
-
-static int prog_dmabuf(struct i810_state *state, unsigned rec)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	struct i810_channel *c;
-	struct sg_item *sg;
-	unsigned long flags;
-	int ret;
-	unsigned fragint;
-	int i;
-
-	spin_lock_irqsave(&state->card->lock, flags);
-	if(dmabuf->enable & DAC_RUNNING)
-		__stop_dac(state);
-	if(dmabuf->enable & ADC_RUNNING)
-		__stop_adc(state);
-	dmabuf->total_bytes = 0;
-	dmabuf->count = dmabuf->error = 0;
-	dmabuf->swptr = dmabuf->hwptr = 0;
-	spin_unlock_irqrestore(&state->card->lock, flags);
-
-	/* allocate DMA buffer, let alloc_dmabuf determine if we are already
-	 * allocated well enough or if we should replace the current buffer
-	 * (assuming one is already allocated, if it isn't, then allocate it).
-	 */
-	if ((ret = alloc_dmabuf(state)))
-		return ret;
-
-	/* FIXME: figure out all this OSS fragment stuff */
-	/* I did, it now does what it should according to the OSS API.  DL */
-	/* We may not have realloced our dmabuf, but the fragment size to
-	 * fragment number ratio may have changed, so go ahead and reprogram
-	 * things
-	 */
-	dmabuf->dmasize = PAGE_SIZE << dmabuf->buforder;
-	dmabuf->numfrag = SG_LEN;
-	dmabuf->fragsize = dmabuf->dmasize/dmabuf->numfrag;
-	dmabuf->fragsamples = dmabuf->fragsize >> 1;
-	dmabuf->fragshift = ffs(dmabuf->fragsize) - 1;
-	dmabuf->userfragsize = dmabuf->ossfragsize;
-	dmabuf->userfrags = dmabuf->dmasize/dmabuf->ossfragsize;
-
-	memset(dmabuf->rawbuf, 0, dmabuf->dmasize);
-
-	if(dmabuf->ossmaxfrags == 4) {
-		fragint = 8;
-	} else if (dmabuf->ossmaxfrags == 8) {
-		fragint = 4;
-	} else if (dmabuf->ossmaxfrags == 16) {
-		fragint = 2;
-	} else {
-		fragint = 1;
-	}
-	/*
-	 *	Now set up the ring 
-	 */
-	if(dmabuf->read_channel)
-		c = dmabuf->read_channel;
-	else
-		c = dmabuf->write_channel;
-	while(c != NULL) {
-		sg=&c->sg[0];
-		/*
-		 *	Load up 32 sg entries and take an interrupt at half
-		 *	way (we might want more interrupts later..) 
-		 */
-	  
-		for(i=0;i<dmabuf->numfrag;i++)
-		{
-			sg->busaddr=(u32)dmabuf->dma_handle+dmabuf->fragsize*i;
-			// the card will always be doing 16bit stereo
-			sg->control=dmabuf->fragsamples;
-			if(state->card->pci_id == PCI_DEVICE_ID_SI_7012)
-				sg->control <<= 1;
-			sg->control|=CON_BUFPAD;
-			// set us up to get IOC interrupts as often as needed to
-			// satisfy numfrag requirements, no more
-			if( ((i+1) % fragint) == 0) {
-				sg->control|=CON_IOC;
-			}
-			sg++;
-		}
-		spin_lock_irqsave(&state->card->lock, flags);
-		I810_IOWRITEB(2, state->card, c->port+OFF_CR);   /* reset DMA machine */
-		while( I810_IOREADB(state->card, c->port+OFF_CR) & 0x02 ) ;
-		I810_IOWRITEL((u32)state->card->chandma +
-		    c->num*sizeof(struct i810_channel),
-		    state->card, c->port+OFF_BDBAR);
-		CIV_TO_LVI(state->card, c->port, 0);
-
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		if(c != dmabuf->write_channel)
-			c = dmabuf->write_channel;
-		else
-			c = NULL;
-	}
-	
-	/* set the ready flag for the dma buffer */
-	dmabuf->ready = 1;
-
-#ifdef DEBUG
-	printk("i810_audio: prog_dmabuf, sample rate = %d, format = %d,\n\tnumfrag = %d, "
-	       "fragsize = %d dmasize = %d\n",
-	       dmabuf->rate, dmabuf->fmt, dmabuf->numfrag,
-	       dmabuf->fragsize, dmabuf->dmasize);
-#endif
-
-	return 0;
-}
-
-static void __i810_update_lvi(struct i810_state *state, int rec)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int x, port;
-	int trigger;
-	int count, fragsize;
-	void (*start)(struct i810_state *);
-
-	count = dmabuf->count;
-	if (rec) {
-		port = dmabuf->read_channel->port;
-		trigger = PCM_ENABLE_INPUT;
-		start = __start_adc;
-		count = dmabuf->dmasize - count;
-	} else {
-		port = dmabuf->write_channel->port;
-		trigger = PCM_ENABLE_OUTPUT;
-		start = __start_dac;
-	}
-
-	/* Do not process partial fragments. */
-	fragsize = dmabuf->fragsize;
-	if (count < fragsize)
-		return;
-
-	/* if we are currently stopped, then our CIV is actually set to our
-	 * *last* sg segment and we are ready to wrap to the next.  However,
-	 * if we set our LVI to the last sg segment, then it won't wrap to
-	 * the next sg segment, it won't even get a start.  So, instead, when
-	 * we are stopped, we set both the LVI value and also we increment
-	 * the CIV value to the next sg segment to be played so that when
-	 * we call start, things will operate properly.  Since the CIV can't
-	 * be written to directly for this purpose, we set the LVI to CIV + 1
-	 * temporarily.  Once the engine has started we set the LVI to its
-	 * final value.
-	 */
-	if (!dmabuf->enable && dmabuf->ready) {
-		if (!(dmabuf->trigger & trigger))
-			return;
-
-		CIV_TO_LVI(state->card, port, 1);
-
-		start(state);
-		while (!(I810_IOREADB(state->card, port + OFF_CR) & ((1<<4) | (1<<2))))
-			;
-	}
-
-	/* MASKP2(swptr, fragsize) - 1 is the tail of our transfer */
-	x = MODULOP2(MASKP2(dmabuf->swptr, fragsize) - 1, dmabuf->dmasize);
-	x >>= dmabuf->fragshift;
-	I810_IOWRITEB(x, state->card, port + OFF_LVI);
-}
-
-static void i810_update_lvi(struct i810_state *state, int rec)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-
-	if(!dmabuf->ready)
-		return;
-	spin_lock_irqsave(&state->card->lock, flags);
-	__i810_update_lvi(state, rec);
-	spin_unlock_irqrestore(&state->card->lock, flags);
-}
-
-/* update buffer manangement pointers, especially, dmabuf->count and dmabuf->hwptr */
-static void i810_update_ptr(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned hwptr;
-	unsigned fragmask, dmamask;
-	int diff;
-
-	fragmask = MASKP2(~0, dmabuf->fragsize);
-	dmamask = MODULOP2(~0, dmabuf->dmasize);
-
-	/* error handling and process wake up for ADC */
-	if (dmabuf->enable == ADC_RUNNING) {
-		/* update hardware pointer */
-		hwptr = i810_get_dma_addr(state, 1) & fragmask;
-		diff = (hwptr - dmabuf->hwptr) & dmamask;
-#if defined(DEBUG_INTERRUPTS) || defined(DEBUG_MMAP)
-		printk("ADC HWP %d,%d,%d\n", hwptr, dmabuf->hwptr, diff);
-#endif
-		dmabuf->hwptr = hwptr;
-		dmabuf->total_bytes += diff;
-		dmabuf->count += diff;
-		if (dmabuf->count > dmabuf->dmasize) {
-			/* buffer underrun or buffer overrun */
-			/* this is normal for the end of a read */
-			/* only give an error if we went past the */
-			/* last valid sg entry */
-			if (GET_CIV(state->card, PI_BASE) !=
-			    GET_LVI(state->card, PI_BASE)) {
-				printk(KERN_WARNING "i810_audio: DMA overrun on read\n");
-				dmabuf->error++;
-			}
-		}
-		if (diff)
-			wake_up(&dmabuf->wait);
-	}
-	/* error handling and process wake up for DAC */
-	if (dmabuf->enable == DAC_RUNNING) {
-		/* update hardware pointer */
-		hwptr = i810_get_dma_addr(state, 0) & fragmask;
-		diff = (hwptr - dmabuf->hwptr) & dmamask;
-#if defined(DEBUG_INTERRUPTS) || defined(DEBUG_MMAP)
-		printk("DAC HWP %d,%d,%d\n", hwptr, dmabuf->hwptr, diff);
-#endif
-		dmabuf->hwptr = hwptr;
-		dmabuf->total_bytes += diff;
-		dmabuf->count -= diff;
-		if (dmabuf->count < 0) {
-			/* buffer underrun or buffer overrun */
-			/* this is normal for the end of a write */
-			/* only give an error if we went past the */
-			/* last valid sg entry */
-			if (GET_CIV(state->card, PO_BASE) !=
-			    GET_LVI(state->card, PO_BASE)) {
-				printk(KERN_WARNING "i810_audio: DMA overrun on write\n");
-				printk("i810_audio: CIV %d, LVI %d, hwptr %x, "
-					"count %d\n",
-					GET_CIV(state->card, PO_BASE),
-					GET_LVI(state->card, PO_BASE),
-					dmabuf->hwptr, dmabuf->count);
-				dmabuf->error++;
-			}
-		}
-		if (diff)
-			wake_up(&dmabuf->wait);
-	}
-}
-
-static inline int i810_get_free_write_space(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int free;
-
-	i810_update_ptr(state);
-	// catch underruns during playback
-	if (dmabuf->count < 0) {
-		dmabuf->count = 0;
-		dmabuf->swptr = dmabuf->hwptr;
-	}
-	free = dmabuf->dmasize - dmabuf->count;
-	if(free < 0)
-		return(0);
-	return(free);
-}
-
-static inline int i810_get_available_read_data(struct i810_state *state)
-{
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int avail;
-
-	i810_update_ptr(state);
-	// catch overruns during record
-	if (dmabuf->count > dmabuf->dmasize) {
-		dmabuf->count = dmabuf->dmasize;
-		dmabuf->swptr = dmabuf->hwptr;
-	}
-	avail = dmabuf->count;
-	if(avail < 0)
-		return(0);
-	return(avail);
-}
-
-static inline void fill_partial_frag(struct dmabuf *dmabuf)
-{
-	unsigned fragsize;
-	unsigned swptr, len;
-
-	fragsize = dmabuf->fragsize;
-	swptr = dmabuf->swptr;
-	len = fragsize - MODULOP2(dmabuf->swptr, fragsize);
-	if (len == fragsize)
-		return;
-
-	memset(dmabuf->rawbuf + swptr, '\0', len);
-	dmabuf->swptr = MODULOP2(swptr + len, dmabuf->dmasize);
-	dmabuf->count += len;
-}
-
-static int drain_dac(struct i810_state *state, int signals_allowed)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	unsigned long tmo;
-	int count;
-
-	if (!dmabuf->ready)
-		return 0;
-	if(dmabuf->mapped) {
-		stop_dac(state);
-		return 0;
-	}
-
-	spin_lock_irqsave(&state->card->lock, flags);
-
-	fill_partial_frag(dmabuf);
-
-	/* 
-	 * This will make sure that our LVI is correct, that our
-	 * pointer is updated, and that the DAC is running.  We
-	 * have to force the setting of dmabuf->trigger to avoid
-	 * any possible deadlocks.
-	 */
-	dmabuf->trigger = PCM_ENABLE_OUTPUT;
-	__i810_update_lvi(state, 0);
-
-	spin_unlock_irqrestore(&state->card->lock, flags);
-
-	add_wait_queue(&dmabuf->wait, &wait);
-	for (;;) {
-
-		spin_lock_irqsave(&state->card->lock, flags);
-		i810_update_ptr(state);
-		count = dmabuf->count;
-
-		/* It seems that we have to set the current state to
-		 * TASK_INTERRUPTIBLE every time to make the process
-		 * really go to sleep.  This also has to be *after* the
-		 * update_ptr() call because update_ptr is likely to
-		 * do a wake_up() which will unset this before we ever
-		 * try to sleep, resuling in a tight loop in this code
-		 * instead of actually sleeping and waiting for an
-		 * interrupt to wake us up!
-		 */
-		__set_current_state(signals_allowed ?
-				    TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-		if (count <= 0)
-			break;
-
-                if (signal_pending(current) && signals_allowed) {
-                        break;
-                }
-
-		/*
-		 * set the timeout to significantly longer than it *should*
-		 * take for the DAC to drain the DMA buffer
-		 */
-		tmo = (count * HZ) / (dmabuf->rate);
-		if (!schedule_timeout(tmo >= 2 ? tmo : 2)){
-			printk(KERN_ERR "i810_audio: drain_dac, dma timeout?\n");
-			count = 0;
-			break;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&dmabuf->wait, &wait);
-	if(count > 0 && signal_pending(current) && signals_allowed)
-		return -ERESTARTSYS;
-	stop_dac(state);
-	return 0;
-}
-
-static void i810_channel_interrupt(struct i810_card *card)
-{
-	int i, count;
-	
-#ifdef DEBUG_INTERRUPTS
-	printk("CHANNEL ");
-#endif
-	for(i=0;i<NR_HW_CH;i++)
-	{
-		struct i810_state *state = card->states[i];
-		struct i810_channel *c;
-		struct dmabuf *dmabuf;
-		unsigned long port;
-		u16 status;
-		
-		if(!state)
-			continue;
-		if(!state->dmabuf.ready)
-			continue;
-		dmabuf = &state->dmabuf;
-		if(dmabuf->enable & DAC_RUNNING) {
-			c=dmabuf->write_channel;
-		} else if(dmabuf->enable & ADC_RUNNING) {
-			c=dmabuf->read_channel;
-		} else	/* This can occur going from R/W to close */
-			continue;
-		
-		port = c->port;
-
-		if(card->pci_id == PCI_DEVICE_ID_SI_7012)
-			status = I810_IOREADW(card, port + OFF_PICB);
-		else
-			status = I810_IOREADW(card, port + OFF_SR);
-
-#ifdef DEBUG_INTERRUPTS
-		printk("NUM %d PORT %X IRQ ( ST%d ", c->num, c->port, status);
-#endif
-		if(status & DMA_INT_COMPLETE)
-		{
-			/* only wake_up() waiters if this interrupt signals
-			 * us being beyond a userfragsize of data open or
-			 * available, and i810_update_ptr() does that for
-			 * us
-			 */
-			i810_update_ptr(state);
-#ifdef DEBUG_INTERRUPTS
-			printk("COMP %d ", dmabuf->hwptr /
-					dmabuf->fragsize);
-#endif
-		}
-		if(status & (DMA_INT_LVI | DMA_INT_DCH))
-		{
-			/* wake_up() unconditionally on LVI and DCH */
-			i810_update_ptr(state);
-			wake_up(&dmabuf->wait);
-#ifdef DEBUG_INTERRUPTS
-			if(status & DMA_INT_LVI)
-				printk("LVI ");
-			if(status & DMA_INT_DCH)
-				printk("DCH -");
-#endif
-			count = dmabuf->count;
-			if(dmabuf->enable & ADC_RUNNING)
-				count = dmabuf->dmasize - count;
-			if (count >= (int)dmabuf->fragsize) {
-				I810_IOWRITEB(I810_IOREADB(card, port+OFF_CR) | 1, card, port+OFF_CR);
-#ifdef DEBUG_INTERRUPTS
-				printk(" CONTINUE ");
-#endif
-			} else {
-				if (dmabuf->enable & DAC_RUNNING)
-					__stop_dac(state);
-				if (dmabuf->enable & ADC_RUNNING)
-					__stop_adc(state);
-				dmabuf->enable = 0;
-#ifdef DEBUG_INTERRUPTS
-				printk(" STOP ");
-#endif
-			}
-		}
-		if(card->pci_id == PCI_DEVICE_ID_SI_7012)
-			I810_IOWRITEW(status & DMA_INT_MASK, card, port + OFF_PICB);
-		else
-			I810_IOWRITEW(status & DMA_INT_MASK, card, port + OFF_SR);
-	}
-#ifdef DEBUG_INTERRUPTS
-	printk(")\n");
-#endif
-}
-
-static irqreturn_t i810_interrupt(int irq, void *dev_id)
-{
-	struct i810_card *card = dev_id;
-	u32 status;
-
-	spin_lock(&card->lock);
-
-	status = I810_IOREADL(card, GLOB_STA);
-
-	if(!(status & INT_MASK)) 
-	{
-		spin_unlock(&card->lock);
-		return IRQ_NONE;  /* not for us */
-	}
-
-	if(status & (INT_PO|INT_PI|INT_MC))
-		i810_channel_interrupt(card);
-
- 	/* clear 'em */
-	I810_IOWRITEL(status & INT_MASK, card, GLOB_STA);
-	spin_unlock(&card->lock);
-	return IRQ_HANDLED;
-}
-
-/* in this loop, dmabuf.count signifies the amount of data that is
-   waiting to be copied to the user's buffer.  It is filled by the dma
-   machine and drained by this loop. */
-
-static ssize_t i810_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct i810_card *card=state ? state->card : NULL;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	ssize_t ret;
-	unsigned long flags;
-	unsigned int swptr;
-	int cnt;
-	int pending;
-        DECLARE_WAITQUEUE(waita, current);
-
-#ifdef DEBUG2
-	printk("i810_audio: i810_read called, count = %d\n", count);
-#endif
-
-	if (dmabuf->mapped)
-		return -ENXIO;
-	if (dmabuf->enable & DAC_RUNNING)
-		return -ENODEV;
-	if (!dmabuf->read_channel) {
-		dmabuf->ready = 0;
-		dmabuf->read_channel = card->alloc_rec_pcm_channel(card);
-		if (!dmabuf->read_channel) {
-			return -EBUSY;
-		}
-	}
-	if (!dmabuf->ready && (ret = prog_dmabuf(state, 1)))
-		return ret;
-	if (!access_ok(VERIFY_WRITE, buffer, count))
-		return -EFAULT;
-	ret = 0;
-
-	pending = 0;
-
-        add_wait_queue(&dmabuf->wait, &waita);
-	while (count > 0) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		spin_lock_irqsave(&card->lock, flags);
-                if (PM_SUSPENDED(card)) {
-                        spin_unlock_irqrestore(&card->lock, flags);
-                        schedule();
-                        if (signal_pending(current)) {
-                                if (!ret) ret = -EAGAIN;
-                                break;
-                        }
-                        continue;
-                }
-		cnt = i810_get_available_read_data(state);
-		swptr = dmabuf->swptr;
-		// this is to make the copy_to_user simpler below
-		if(cnt > (dmabuf->dmasize - swptr))
-			cnt = dmabuf->dmasize - swptr;
-		spin_unlock_irqrestore(&card->lock, flags);
-
-		if (cnt > count)
-			cnt = count;
-		if (cnt <= 0) {
-			unsigned long tmo;
-			/*
-			 * Don't let us deadlock.  The ADC won't start if
-			 * dmabuf->trigger isn't set.  A call to SETTRIGGER
-			 * could have turned it off after we set it to on
-			 * previously.
-			 */
-			dmabuf->trigger = PCM_ENABLE_INPUT;
-			/*
-			 * This does three things.  Updates LVI to be correct,
-			 * makes sure the ADC is running, and updates the
-			 * hwptr.
-			 */
-			i810_update_lvi(state,1);
-			if (file->f_flags & O_NONBLOCK) {
-				if (!ret) ret = -EAGAIN;
-				goto done;
-			}
-			/* Set the timeout to how long it would take to fill
-			 * two of our buffers.  If we haven't been woke up
-			 * by then, then we know something is wrong.
-			 */
-			tmo = (dmabuf->dmasize * HZ * 2) / (dmabuf->rate * 4);
-			/* There are two situations when sleep_on_timeout returns, one is when
-			   the interrupt is serviced correctly and the process is waked up by
-			   ISR ON TIME. Another is when timeout is expired, which means that
-			   either interrupt is NOT serviced correctly (pending interrupt) or it
-			   is TOO LATE for the process to be scheduled to run (scheduler latency)
-			   which results in a (potential) buffer overrun. And worse, there is
-			   NOTHING we can do to prevent it. */
-			if (!schedule_timeout(tmo >= 2 ? tmo : 2)) {
-#ifdef DEBUG
-				printk(KERN_ERR "i810_audio: recording schedule timeout, "
-				       "dmasz %u fragsz %u count %i hwptr %u swptr %u\n",
-				       dmabuf->dmasize, dmabuf->fragsize, dmabuf->count,
-				       dmabuf->hwptr, dmabuf->swptr);
-#endif
-				/* a buffer overrun, we delay the recovery until next time the
-				   while loop begin and we REALLY have space to record */
-			}
-			if (signal_pending(current)) {
-				ret = ret ? ret : -ERESTARTSYS;
-				goto done;
-			}
-			continue;
-		}
-
-		if (copy_to_user(buffer, dmabuf->rawbuf + swptr, cnt)) {
-			if (!ret) ret = -EFAULT;
-			goto done;
-		}
-
-		swptr = MODULOP2(swptr + cnt, dmabuf->dmasize);
-
-		spin_lock_irqsave(&card->lock, flags);
-
-                if (PM_SUSPENDED(card)) {
-                        spin_unlock_irqrestore(&card->lock, flags);
-                        continue;
-                }
-		dmabuf->swptr = swptr;
-		pending = dmabuf->count -= cnt;
-		spin_unlock_irqrestore(&card->lock, flags);
-
-		count -= cnt;
-		buffer += cnt;
-		ret += cnt;
-	}
- done:
-	pending = dmabuf->dmasize - pending;
-	if (dmabuf->enable || pending >= dmabuf->userfragsize)
-		i810_update_lvi(state, 1);
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&dmabuf->wait, &waita);
-
-	return ret;
-}
-
-/* in this loop, dmabuf.count signifies the amount of data that is waiting to be dma to
-   the soundcard.  it is drained by the dma machine and filled by this loop. */
-static ssize_t i810_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct i810_card *card=state ? state->card : NULL;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	ssize_t ret;
-	unsigned long flags;
-	unsigned int swptr = 0;
-	int pending;
-	int cnt;
-        DECLARE_WAITQUEUE(waita, current);
-
-#ifdef DEBUG2
-	printk("i810_audio: i810_write called, count = %d\n", count);
-#endif
-
-	if (dmabuf->mapped)
-		return -ENXIO;
-	if (dmabuf->enable & ADC_RUNNING)
-		return -ENODEV;
-	if (!dmabuf->write_channel) {
-		dmabuf->ready = 0;
-		dmabuf->write_channel = card->alloc_pcm_channel(card);
-		if(!dmabuf->write_channel)
-			return -EBUSY;
-	}
-	if (!dmabuf->ready && (ret = prog_dmabuf(state, 0)))
-		return ret;
-	if (!access_ok(VERIFY_READ, buffer, count))
-		return -EFAULT;
-	ret = 0;
-
-	pending = 0;
-
-        add_wait_queue(&dmabuf->wait, &waita);
-	while (count > 0) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		spin_lock_irqsave(&state->card->lock, flags);
-                if (PM_SUSPENDED(card)) {
-                        spin_unlock_irqrestore(&card->lock, flags);
-                        schedule();
-                        if (signal_pending(current)) {
-                                if (!ret) ret = -EAGAIN;
-                                break;
-                        }
-                        continue;
-                }
-
-		cnt = i810_get_free_write_space(state);
-		swptr = dmabuf->swptr;
-		/* Bound the maximum size to how much we can copy to the
-		 * dma buffer before we hit the end.  If we have more to
-		 * copy then it will get done in a second pass of this
-		 * loop starting from the beginning of the buffer.
-		 */
-		if(cnt > (dmabuf->dmasize - swptr))
-			cnt = dmabuf->dmasize - swptr;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-
-#ifdef DEBUG2
-		printk(KERN_INFO "i810_audio: i810_write: %d bytes available space\n", cnt);
-#endif
-		if (cnt > count)
-			cnt = count;
-		if (cnt <= 0) {
-			unsigned long tmo;
-			// There is data waiting to be played
-			/*
-			 * Force the trigger setting since we would
-			 * deadlock with it set any other way
-			 */
-			dmabuf->trigger = PCM_ENABLE_OUTPUT;
-			i810_update_lvi(state,0);
-			if (file->f_flags & O_NONBLOCK) {
-				if (!ret) ret = -EAGAIN;
-				goto ret;
-			}
-			/* Not strictly correct but works */
-			tmo = (dmabuf->dmasize * HZ * 2) / (dmabuf->rate * 4);
-			/* There are two situations when sleep_on_timeout returns, one is when
-			   the interrupt is serviced correctly and the process is waked up by
-			   ISR ON TIME. Another is when timeout is expired, which means that
-			   either interrupt is NOT serviced correctly (pending interrupt) or it
-			   is TOO LATE for the process to be scheduled to run (scheduler latency)
-			   which results in a (potential) buffer underrun. And worse, there is
-			   NOTHING we can do to prevent it. */
-			if (!schedule_timeout(tmo >= 2 ? tmo : 2)) {
-#ifdef DEBUG
-				printk(KERN_ERR "i810_audio: playback schedule timeout, "
-				       "dmasz %u fragsz %u count %i hwptr %u swptr %u\n",
-				       dmabuf->dmasize, dmabuf->fragsize, dmabuf->count,
-				       dmabuf->hwptr, dmabuf->swptr);
-#endif
-				/* a buffer underrun, we delay the recovery until next time the
-				   while loop begin and we REALLY have data to play */
-				//return ret;
-			}
-			if (signal_pending(current)) {
-				if (!ret) ret = -ERESTARTSYS;
-				goto ret;
-			}
-			continue;
-		}
-		if (copy_from_user(dmabuf->rawbuf+swptr,buffer,cnt)) {
-			if (!ret) ret = -EFAULT;
-			goto ret;
-		}
-
-		swptr = MODULOP2(swptr + cnt, dmabuf->dmasize);
-
-		spin_lock_irqsave(&state->card->lock, flags);
-                if (PM_SUSPENDED(card)) {
-                        spin_unlock_irqrestore(&card->lock, flags);
-                        continue;
-                }
-
-		dmabuf->swptr = swptr;
-		pending = dmabuf->count += cnt;
-
-		count -= cnt;
-		buffer += cnt;
-		ret += cnt;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-	}
-ret:
-	if (dmabuf->enable || pending >= dmabuf->userfragsize)
-		i810_update_lvi(state, 0);
-        set_current_state(TASK_RUNNING);
-        remove_wait_queue(&dmabuf->wait, &waita);
-
-	return ret;
-}
-
-/* No kernel lock - we have our own spinlock */
-static unsigned int i810_poll(struct file *file, struct poll_table_struct *wait)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	unsigned int mask = 0;
-
-	if(!dmabuf->ready)
-		return 0;
-	poll_wait(file, &dmabuf->wait, wait);
-	spin_lock_irqsave(&state->card->lock, flags);
-	if (dmabuf->enable & ADC_RUNNING ||
-	    dmabuf->trigger & PCM_ENABLE_INPUT) {
-		if (i810_get_available_read_data(state) >= 
-		    (signed)dmabuf->userfragsize)
-			mask |= POLLIN | POLLRDNORM;
-	}
-	if (dmabuf->enable & DAC_RUNNING ||
-	    dmabuf->trigger & PCM_ENABLE_OUTPUT) {
-		if (i810_get_free_write_space(state) >=
-		    (signed)dmabuf->userfragsize)
-			mask |= POLLOUT | POLLWRNORM;
-	}
-	spin_unlock_irqrestore(&state->card->lock, flags);
-	return mask;
-}
-
-static int i810_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	int ret = -EINVAL;
-	unsigned long size;
-
-	lock_kernel();
-	if (vma->vm_flags & VM_WRITE) {
-		if (!dmabuf->write_channel &&
-		    (dmabuf->write_channel =
-		     state->card->alloc_pcm_channel(state->card)) == NULL) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-	if (vma->vm_flags & VM_READ) {
-		if (!dmabuf->read_channel &&
-		    (dmabuf->read_channel = 
-		     state->card->alloc_rec_pcm_channel(state->card)) == NULL) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-	if ((ret = prog_dmabuf(state, 0)) != 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (vma->vm_pgoff != 0)
-		goto out;
-	size = vma->vm_end - vma->vm_start;
-	if (size > (PAGE_SIZE << dmabuf->buforder))
-		goto out;
-	ret = -EAGAIN;
-	if (remap_pfn_range(vma, vma->vm_start,
-			     virt_to_phys(dmabuf->rawbuf) >> PAGE_SHIFT,
-			     size, vma->vm_page_prot))
-		goto out;
-	dmabuf->mapped = 1;
-	dmabuf->trigger = 0;
-	ret = 0;
-#ifdef DEBUG_MMAP
-	printk("i810_audio: mmap'ed %ld bytes of data space\n", size);
-#endif
-out:
-	unlock_kernel();
-	return ret;
-}
-
-static int i810_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct i810_channel *c = NULL;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-	audio_buf_info abinfo;
-	count_info cinfo;
-	unsigned int i_glob_cnt;
-	int val = 0, ret;
-	struct ac97_codec *codec = state->card->ac97_codec[0];
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-
-#ifdef DEBUG
-	printk("i810_audio: i810_ioctl, arg=0x%x, cmd=", arg ? *p : 0);
-#endif
-
-	switch (cmd) 
-	{
-	case OSS_GETVERSION:
-#ifdef DEBUG
-		printk("OSS_GETVERSION\n");
-#endif
-		return put_user(SOUND_VERSION, p);
-
-	case SNDCTL_DSP_RESET:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_RESET\n");
-#endif
-		spin_lock_irqsave(&state->card->lock, flags);
-		if (dmabuf->enable == DAC_RUNNING) {
-			c = dmabuf->write_channel;
-			__stop_dac(state);
-		}
-		if (dmabuf->enable == ADC_RUNNING) {
-			c = dmabuf->read_channel;
-			__stop_adc(state);
-		}
-		if (c != NULL) {
-			I810_IOWRITEB(2, state->card, c->port+OFF_CR);   /* reset DMA machine */
-			while ( I810_IOREADB(state->card, c->port+OFF_CR) & 2 )
-				cpu_relax();
-			I810_IOWRITEL((u32)state->card->chandma +
-			    c->num*sizeof(struct i810_channel),
-			    state->card, c->port+OFF_BDBAR);
-			CIV_TO_LVI(state->card, c->port, 0);
-		}
-
-		spin_unlock_irqrestore(&state->card->lock, flags);
-		synchronize_irq(state->card->pci_dev->irq);
-		dmabuf->ready = 0;
-		dmabuf->swptr = dmabuf->hwptr = 0;
-		dmabuf->count = dmabuf->total_bytes = 0;
-		return 0;
-
-	case SNDCTL_DSP_SYNC:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SYNC\n");
-#endif
-		if (dmabuf->enable != DAC_RUNNING || file->f_flags & O_NONBLOCK)
-			return 0;
-		if((val = drain_dac(state, 1)))
-			return val;
-		dmabuf->total_bytes = 0;
-		return 0;
-
-	case SNDCTL_DSP_SPEED: /* set smaple rate */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SPEED\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-		if (val >= 0) {
-			if (file->f_mode & FMODE_WRITE) {
-				if ( (state->card->ac97_status & SPDIF_ON) ) {  /* S/PDIF Enabled */
-					/* AD1886 only supports 48000, need to check that */
-					if ( i810_valid_spdif_rate ( codec, val ) ) {
-						/* Set DAC rate */
-                                        	i810_set_spdif_output ( state, -1, 0 );
-						stop_dac(state);
-						dmabuf->ready = 0;
-						spin_lock_irqsave(&state->card->lock, flags);
-						i810_set_dac_rate(state, val);
-						spin_unlock_irqrestore(&state->card->lock, flags);
-						/* Set S/PDIF transmitter rate. */
-						i810_set_spdif_output ( state, AC97_EA_SPSA_3_4, val );
-	                                        if ( ! (state->card->ac97_status & SPDIF_ON) ) {
-							val = dmabuf->rate;
-						}
-					} else { /* Not a valid rate for S/PDIF, ignore it */
-						val = dmabuf->rate;
-					}
-				} else {
-					stop_dac(state);
-					dmabuf->ready = 0;
-					spin_lock_irqsave(&state->card->lock, flags);
-					i810_set_dac_rate(state, val);
-					spin_unlock_irqrestore(&state->card->lock, flags);
-				}
-			}
-			if (file->f_mode & FMODE_READ) {
-				stop_adc(state);
-				dmabuf->ready = 0;
-				spin_lock_irqsave(&state->card->lock, flags);
-				i810_set_adc_rate(state, val);
-				spin_unlock_irqrestore(&state->card->lock, flags);
-			}
-		}
-		return put_user(dmabuf->rate, p);
-
-	case SNDCTL_DSP_STEREO: /* set stereo or mono channel */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_STEREO\n");
-#endif
-		if (dmabuf->enable & DAC_RUNNING) {
-			stop_dac(state);
-		}
-		if (dmabuf->enable & ADC_RUNNING) {
-			stop_adc(state);
-		}
-		return put_user(1, p);
-
-	case SNDCTL_DSP_GETBLKSIZE:
-		if (file->f_mode & FMODE_WRITE) {
-			if (!dmabuf->ready && (val = prog_dmabuf(state, 0)))
-				return val;
-		}
-		if (file->f_mode & FMODE_READ) {
-			if (!dmabuf->ready && (val = prog_dmabuf(state, 1)))
-				return val;
-		}
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETBLKSIZE %d\n", dmabuf->userfragsize);
-#endif
-		return put_user(dmabuf->userfragsize, p);
-
-	case SNDCTL_DSP_GETFMTS: /* Returns a mask of supported sample format*/
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETFMTS\n");
-#endif
-		return put_user(AFMT_S16_LE, p);
-
-	case SNDCTL_DSP_SETFMT: /* Select sample format */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SETFMT\n");
-#endif
-		return put_user(AFMT_S16_LE, p);
-
-	case SNDCTL_DSP_CHANNELS:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_CHANNELS\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-
-		if (val > 0) {
-			if (dmabuf->enable & DAC_RUNNING) {
-				stop_dac(state);
-			}
-			if (dmabuf->enable & ADC_RUNNING) {
-				stop_adc(state);
-			}
-		} else {
-			return put_user(state->card->channels, p);
-		}
-
-		/* ICH and ICH0 only support 2 channels */
-		if ( state->card->pci_id == PCI_DEVICE_ID_INTEL_82801AA_5
-		     || state->card->pci_id == PCI_DEVICE_ID_INTEL_82801AB_5) 
-			return put_user(2, p);
-	
-		/* Multi-channel support was added with ICH2. Bits in */
-		/* Global Status and Global Control register are now  */
-		/* used to indicate this.                             */
-
-                i_glob_cnt = I810_IOREADL(state->card, GLOB_CNT);
-
-		/* Current # of channels enabled */
-		if ( i_glob_cnt & 0x0100000 )
-			ret = 4;
-		else if ( i_glob_cnt & 0x0200000 )
-			ret = 6;
-		else
-			ret = 2;
-
-		switch ( val ) {
-			case 2: /* 2 channels is always supported */
-				I810_IOWRITEL(i_glob_cnt & 0xffcfffff,
-				     state->card, GLOB_CNT);
-				/* Do we need to change mixer settings????  */
-				break;
-			case 4: /* Supported on some chipsets, better check first */
-				if ( state->card->channels >= 4 ) {
-					I810_IOWRITEL((i_glob_cnt & 0xffcfffff) | 0x100000,
-					      state->card, GLOB_CNT);
-					/* Do we need to change mixer settings??? */
-				} else {
-					val = ret;
-				}
-				break;
-			case 6: /* Supported on some chipsets, better check first */
-				if ( state->card->channels >= 6 ) {
-					I810_IOWRITEL((i_glob_cnt & 0xffcfffff) | 0x200000,
-					      state->card, GLOB_CNT);
-					/* Do we need to change mixer settings??? */
-				} else {
-					val = ret;
-				}
-				break;
-			default: /* nothing else is ever supported by the chipset */
-				val = ret;
-				break;
-		}
-
-		return put_user(val, p);
-
-	case SNDCTL_DSP_POST: /* the user has sent all data and is notifying us */
-		/* we update the swptr to the end of the last sg segment then return */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_POST\n");
-#endif
-		if(!dmabuf->ready || (dmabuf->enable != DAC_RUNNING))
-			return 0;
-		if((dmabuf->swptr % dmabuf->fragsize) != 0) {
-			val = dmabuf->fragsize - (dmabuf->swptr % dmabuf->fragsize);
-			dmabuf->swptr += val;
-			dmabuf->count += val;
-		}
-		return 0;
-
-	case SNDCTL_DSP_SUBDIVIDE:
-		if (dmabuf->subdivision)
-			return -EINVAL;
-		if (get_user(val, p))
-			return -EFAULT;
-		if (val != 1 && val != 2 && val != 4)
-			return -EINVAL;
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SUBDIVIDE %d\n", val);
-#endif
-		dmabuf->subdivision = val;
-		dmabuf->ready = 0;
-		return 0;
-
-	case SNDCTL_DSP_SETFRAGMENT:
-		if (get_user(val, p))
-			return -EFAULT;
-
-		dmabuf->ossfragsize = 1<<(val & 0xffff);
-		dmabuf->ossmaxfrags = (val >> 16) & 0xffff;
-		if (!dmabuf->ossfragsize || !dmabuf->ossmaxfrags)
-			return -EINVAL;
-		/*
-		 * Bound the frag size into our allowed range of 256 - 4096
-		 */
-		if (dmabuf->ossfragsize < 256)
-			dmabuf->ossfragsize = 256;
-		else if (dmabuf->ossfragsize > 4096)
-			dmabuf->ossfragsize = 4096;
-		/*
-		 * The numfrags could be something reasonable, or it could
-		 * be 0xffff meaning "Give me as much as possible".  So,
-		 * we check the numfrags * fragsize doesn't exceed our
-		 * 64k buffer limit, nor is it less than our 8k minimum.
-		 * If it fails either one of these checks, then adjust the
-		 * number of fragments, not the size of them.  It's OK if
-		 * our number of fragments doesn't equal 32 or anything
-		 * like our hardware based number now since we are using
-		 * a different frag count for the hardware.  Before we get
-		 * into this though, bound the maxfrags to avoid overflow
-		 * issues.  A reasonable bound would be 64k / 256 since our
-		 * maximum buffer size is 64k and our minimum frag size is
-		 * 256.  On the other end, our minimum buffer size is 8k and
-		 * our maximum frag size is 4k, so the lower bound should
-		 * be 2.
-		 */
-
-		if(dmabuf->ossmaxfrags > 256)
-			dmabuf->ossmaxfrags = 256;
-		else if (dmabuf->ossmaxfrags < 2)
-			dmabuf->ossmaxfrags = 2;
-
-		val = dmabuf->ossfragsize * dmabuf->ossmaxfrags;
-		while (val < 8192) {
-		    val <<= 1;
-		    dmabuf->ossmaxfrags <<= 1;
-		}
-		while (val > 65536) {
-		    val >>= 1;
-		    dmabuf->ossmaxfrags >>= 1;
-		}
-		dmabuf->ready = 0;
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SETFRAGMENT 0x%x, %d, %d\n", val,
-			dmabuf->ossfragsize, dmabuf->ossmaxfrags);
-#endif
-
-		return 0;
-
-	case SNDCTL_DSP_GETOSPACE:
-		if (!(file->f_mode & FMODE_WRITE))
-			return -EINVAL;
-		if (!dmabuf->ready && (val = prog_dmabuf(state, 0)) != 0)
-			return val;
-		spin_lock_irqsave(&state->card->lock, flags);
-		i810_update_ptr(state);
-		abinfo.fragsize = dmabuf->userfragsize;
-		abinfo.fragstotal = dmabuf->userfrags;
-		if (dmabuf->mapped)
- 			abinfo.bytes = dmabuf->dmasize;
-  		else
- 			abinfo.bytes = i810_get_free_write_space(state);
-		abinfo.fragments = abinfo.bytes / dmabuf->userfragsize;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-#if defined(DEBUG) || defined(DEBUG_MMAP)
-		printk("SNDCTL_DSP_GETOSPACE %d, %d, %d, %d\n", abinfo.bytes,
-			abinfo.fragsize, abinfo.fragments, abinfo.fragstotal);
-#endif
-		return copy_to_user(argp, &abinfo, sizeof(abinfo)) ? -EFAULT : 0;
-
-	case SNDCTL_DSP_GETOPTR:
-		if (!(file->f_mode & FMODE_WRITE))
-			return -EINVAL;
-		if (!dmabuf->ready && (val = prog_dmabuf(state, 0)) != 0)
-			return val;
-		spin_lock_irqsave(&state->card->lock, flags);
-		val = i810_get_free_write_space(state);
-		cinfo.bytes = dmabuf->total_bytes;
-		cinfo.ptr = dmabuf->hwptr;
-		cinfo.blocks = val/dmabuf->userfragsize;
-		if (dmabuf->mapped && (dmabuf->trigger & PCM_ENABLE_OUTPUT)) {
-			dmabuf->count += val;
-			dmabuf->swptr = (dmabuf->swptr + val) % dmabuf->dmasize;
-			__i810_update_lvi(state, 0);
-		}
-		spin_unlock_irqrestore(&state->card->lock, flags);
-#if defined(DEBUG) || defined(DEBUG_MMAP)
-		printk("SNDCTL_DSP_GETOPTR %d, %d, %d, %d\n", cinfo.bytes,
-			cinfo.blocks, cinfo.ptr, dmabuf->count);
-#endif
-		return copy_to_user(argp, &cinfo, sizeof(cinfo)) ? -EFAULT : 0;
-
-	case SNDCTL_DSP_GETISPACE:
-		if (!(file->f_mode & FMODE_READ))
-			return -EINVAL;
-		if (!dmabuf->ready && (val = prog_dmabuf(state, 1)) != 0)
-			return val;
-		spin_lock_irqsave(&state->card->lock, flags);
-		abinfo.bytes = i810_get_available_read_data(state);
-		abinfo.fragsize = dmabuf->userfragsize;
-		abinfo.fragstotal = dmabuf->userfrags;
-		abinfo.fragments = abinfo.bytes / dmabuf->userfragsize;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-#if defined(DEBUG) || defined(DEBUG_MMAP)
-		printk("SNDCTL_DSP_GETISPACE %d, %d, %d, %d\n", abinfo.bytes,
-			abinfo.fragsize, abinfo.fragments, abinfo.fragstotal);
-#endif
-		return copy_to_user(argp, &abinfo, sizeof(abinfo)) ? -EFAULT : 0;
-
-	case SNDCTL_DSP_GETIPTR:
-		if (!(file->f_mode & FMODE_READ))
-			return -EINVAL;
-		if (!dmabuf->ready && (val = prog_dmabuf(state, 0)) != 0)
-			return val;
-		spin_lock_irqsave(&state->card->lock, flags);
-		val = i810_get_available_read_data(state);
-		cinfo.bytes = dmabuf->total_bytes;
-		cinfo.blocks = val/dmabuf->userfragsize;
-		cinfo.ptr = dmabuf->hwptr;
-		if (dmabuf->mapped && (dmabuf->trigger & PCM_ENABLE_INPUT)) {
-			dmabuf->count -= val;
-			dmabuf->swptr = (dmabuf->swptr + val) % dmabuf->dmasize;
-			__i810_update_lvi(state, 1);
-		}
-		spin_unlock_irqrestore(&state->card->lock, flags);
-#if defined(DEBUG) || defined(DEBUG_MMAP)
-		printk("SNDCTL_DSP_GETIPTR %d, %d, %d, %d\n", cinfo.bytes,
-			cinfo.blocks, cinfo.ptr, dmabuf->count);
-#endif
-		return copy_to_user(argp, &cinfo, sizeof(cinfo)) ? -EFAULT : 0;
-
-	case SNDCTL_DSP_NONBLOCK:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_NONBLOCK\n");
-#endif
-		file->f_flags |= O_NONBLOCK;
-		return 0;
-
-	case SNDCTL_DSP_GETCAPS:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETCAPS\n");
-#endif
-	    return put_user(DSP_CAP_REALTIME|DSP_CAP_TRIGGER|DSP_CAP_MMAP|DSP_CAP_BIND,
-			    p);
-
-	case SNDCTL_DSP_GETTRIGGER:
-		val = 0;
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETTRIGGER 0x%x\n", dmabuf->trigger);
-#endif
-		return put_user(dmabuf->trigger, p);
-
-	case SNDCTL_DSP_SETTRIGGER:
-		if (get_user(val, p))
-			return -EFAULT;
-#if defined(DEBUG) || defined(DEBUG_MMAP)
-		printk("SNDCTL_DSP_SETTRIGGER 0x%x\n", val);
-#endif
-		/* silently ignore invalid PCM_ENABLE_xxx bits,
-		 * like the other drivers do
-		 */
-		if (!(file->f_mode & FMODE_READ ))
-			val &= ~PCM_ENABLE_INPUT;
-		if (!(file->f_mode & FMODE_WRITE ))
-			val &= ~PCM_ENABLE_OUTPUT;
-		if((file->f_mode & FMODE_READ) && !(val & PCM_ENABLE_INPUT) && dmabuf->enable == ADC_RUNNING) {
-			stop_adc(state);
-		}
-		if((file->f_mode & FMODE_WRITE) && !(val & PCM_ENABLE_OUTPUT) && dmabuf->enable == DAC_RUNNING) {
-			stop_dac(state);
-		}
-		dmabuf->trigger = val;
-		if((val & PCM_ENABLE_OUTPUT) && !(dmabuf->enable & DAC_RUNNING)) {
-			if (!dmabuf->write_channel) {
-				dmabuf->ready = 0;
-				dmabuf->write_channel = state->card->alloc_pcm_channel(state->card);
-				if (!dmabuf->write_channel)
-					return -EBUSY;
-			}
-			if (!dmabuf->ready && (ret = prog_dmabuf(state, 0)))
-				return ret;
-			if (dmabuf->mapped) {
-				spin_lock_irqsave(&state->card->lock, flags);
-				i810_update_ptr(state);
-				dmabuf->count = 0;
-				dmabuf->swptr = dmabuf->hwptr;
-				dmabuf->count = i810_get_free_write_space(state);
-				dmabuf->swptr = (dmabuf->swptr + dmabuf->count) % dmabuf->dmasize;
-				spin_unlock_irqrestore(&state->card->lock, flags);
-			}
-			i810_update_lvi(state, 0);
-			start_dac(state);
-		}
-		if((val & PCM_ENABLE_INPUT) && !(dmabuf->enable & ADC_RUNNING)) {
-			if (!dmabuf->read_channel) {
-				dmabuf->ready = 0;
-				dmabuf->read_channel = state->card->alloc_rec_pcm_channel(state->card);
-				if (!dmabuf->read_channel)
-					return -EBUSY;
-			}
-			if (!dmabuf->ready && (ret = prog_dmabuf(state, 1)))
-				return ret;
-			if (dmabuf->mapped) {
-				spin_lock_irqsave(&state->card->lock, flags);
-				i810_update_ptr(state);
-				dmabuf->swptr = dmabuf->hwptr;
-				dmabuf->count = 0;
-				spin_unlock_irqrestore(&state->card->lock, flags);
-			}
-			i810_update_lvi(state, 1);
-			start_adc(state);
-		}
-		return 0;
-
-	case SNDCTL_DSP_SETDUPLEX:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SETDUPLEX\n");
-#endif
-		return -EINVAL;
-
-	case SNDCTL_DSP_GETODELAY:
-		if (!(file->f_mode & FMODE_WRITE))
-			return -EINVAL;
-		spin_lock_irqsave(&state->card->lock, flags);
-		i810_update_ptr(state);
-		val = dmabuf->count;
-		spin_unlock_irqrestore(&state->card->lock, flags);
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETODELAY %d\n", dmabuf->count);
-#endif
-		return put_user(val, p);
-
-	case SOUND_PCM_READ_RATE:
-#ifdef DEBUG
-		printk("SOUND_PCM_READ_RATE %d\n", dmabuf->rate);
-#endif
-		return put_user(dmabuf->rate, p);
-
-	case SOUND_PCM_READ_CHANNELS:
-#ifdef DEBUG
-		printk("SOUND_PCM_READ_CHANNELS\n");
-#endif
-		return put_user(2, p);
-
-	case SOUND_PCM_READ_BITS:
-#ifdef DEBUG
-		printk("SOUND_PCM_READ_BITS\n");
-#endif
-		return put_user(AFMT_S16_LE, p);
-
-	case SNDCTL_DSP_SETSPDIF: /* Set S/PDIF Control register */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_SETSPDIF\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-
-		/* Check to make sure the codec supports S/PDIF transmitter */
-
-		if((state->card->ac97_features & 4)) {
-			/* mask out the transmitter speed bits so the user can't set them */
-			val &= ~0x3000;
-
-			/* Add the current transmitter speed bits to the passed value */
-			ret = i810_ac97_get(codec, AC97_SPDIF_CONTROL);
-			val |= (ret & 0x3000);
-
-			i810_ac97_set(codec, AC97_SPDIF_CONTROL, val);
-			if(i810_ac97_get(codec, AC97_SPDIF_CONTROL) != val ) {
-				printk(KERN_ERR "i810_audio: Unable to set S/PDIF configuration to 0x%04x.\n", val);
-				return -EFAULT;
-			}
-		}
-#ifdef DEBUG
-		else 
-			printk(KERN_WARNING "i810_audio: S/PDIF transmitter not avalible.\n");
-#endif
-		return put_user(val, p);
-
-	case SNDCTL_DSP_GETSPDIF: /* Get S/PDIF Control register */
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETSPDIF\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-
-		/* Check to make sure the codec supports S/PDIF transmitter */
-
-		if(!(state->card->ac97_features & 4)) {
-#ifdef DEBUG
-			printk(KERN_WARNING "i810_audio: S/PDIF transmitter not avalible.\n");
-#endif
-			val = 0;
-		} else {
-			val = i810_ac97_get(codec, AC97_SPDIF_CONTROL);
-		}
-		//return put_user((val & 0xcfff), p);
-		return put_user(val, p);
-   			
-	case SNDCTL_DSP_GETCHANNELMASK:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_GETCHANNELMASK\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-		
-		/* Based on AC'97 DAC support, not ICH hardware */
-		val = DSP_BIND_FRONT;
-		if ( state->card->ac97_features & 0x0004 )
-			val |= DSP_BIND_SPDIF;
-
-		if ( state->card->ac97_features & 0x0080 )
-			val |= DSP_BIND_SURR;
-		if ( state->card->ac97_features & 0x0140 )
-			val |= DSP_BIND_CENTER_LFE;
-
-		return put_user(val, p);
-
-	case SNDCTL_DSP_BIND_CHANNEL:
-#ifdef DEBUG
-		printk("SNDCTL_DSP_BIND_CHANNEL\n");
-#endif
-		if (get_user(val, p))
-			return -EFAULT;
-		if ( val == DSP_BIND_QUERY ) {
-			val = DSP_BIND_FRONT; /* Always report this as being enabled */
-			if ( state->card->ac97_status & SPDIF_ON ) 
-				val |= DSP_BIND_SPDIF;
-			else {
-				if ( state->card->ac97_status & SURR_ON )
-					val |= DSP_BIND_SURR;
-				if ( state->card->ac97_status & CENTER_LFE_ON )
-					val |= DSP_BIND_CENTER_LFE;
-			}
-		} else {  /* Not a query, set it */
-			if (!(file->f_mode & FMODE_WRITE))
-				return -EINVAL;
-			if ( dmabuf->enable == DAC_RUNNING ) {
-				stop_dac(state);
-			}
-			if ( val & DSP_BIND_SPDIF ) {  /* Turn on SPDIF */
-				/*  Ok, this should probably define what slots
-				 *  to use. For now, we'll only set it to the
-				 *  defaults:
-				 * 
-				 *   non multichannel codec maps to slots 3&4
-				 *   2 channel codec maps to slots 7&8
-				 *   4 channel codec maps to slots 6&9
-				 *   6 channel codec maps to slots 10&11
-				 *
-				 *  there should be some way for the app to
-				 *  select the slot assignment.
-				 */
-	
-				i810_set_spdif_output ( state, AC97_EA_SPSA_3_4, dmabuf->rate );
-				if ( !(state->card->ac97_status & SPDIF_ON) )
-					val &= ~DSP_BIND_SPDIF;
-			} else {
-				int mask;
-				int channels;
-
-				/* Turn off S/PDIF if it was on */
-				if ( state->card->ac97_status & SPDIF_ON ) 
-					i810_set_spdif_output ( state, -1, 0 );
-				
-				mask = val & (DSP_BIND_FRONT | DSP_BIND_SURR | DSP_BIND_CENTER_LFE);
-				switch (mask) {
-					case DSP_BIND_FRONT:
-						channels = 2;
-						break;
-					case DSP_BIND_FRONT|DSP_BIND_SURR:
-						channels = 4;
-						break;
-					case DSP_BIND_FRONT|DSP_BIND_SURR|DSP_BIND_CENTER_LFE:
-						channels = 6;
-						break;
-					default:
-						val = DSP_BIND_FRONT;
-						channels = 2;
-						break;
-				}
-				i810_set_dac_channels ( state, channels );
-
-				/* check that they really got turned on */
-				if (!(state->card->ac97_status & SURR_ON))
-					val &= ~DSP_BIND_SURR;
-				if (!(state->card->ac97_status & CENTER_LFE_ON))
-					val &= ~DSP_BIND_CENTER_LFE;
-			}
-		}
-		return put_user(val, p);
-		
-	case SNDCTL_DSP_MAPINBUF:
-	case SNDCTL_DSP_MAPOUTBUF:
-	case SNDCTL_DSP_SETSYNCRO:
-	case SOUND_PCM_WRITE_FILTER:
-	case SOUND_PCM_READ_FILTER:
-#ifdef DEBUG
-		printk("SNDCTL_* -EINVAL\n");
-#endif
-		return -EINVAL;
-	}
-	return -EINVAL;
-}
-
-static int i810_open(struct inode *inode, struct file *file)
-{
-	int i = 0;
-	struct i810_card *card = devs;
-	struct i810_state *state = NULL;
-	struct dmabuf *dmabuf = NULL;
-
-	/* find an avaiable virtual channel (instance of /dev/dsp) */
-	while (card != NULL) {
-		/*
-		 * If we are initializing and then fail, card could go
-		 * away unuexpectedly while we are in the for() loop.
-		 * So, check for card on each iteration before we check
-		 * for card->initializing to avoid a possible oops.
-		 * This usually only matters for times when the driver is
-		 * autoloaded by kmod.
-		 */
-		for (i = 0; i < 50 && card && card->initializing; i++) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ/20);
-		}
-		for (i = 0; i < NR_HW_CH && card && !card->initializing; i++) {
-			if (card->states[i] == NULL) {
-				state = card->states[i] = (struct i810_state *)
-					kzalloc(sizeof(struct i810_state), GFP_KERNEL);
-				if (state == NULL)
-					return -ENOMEM;
-				dmabuf = &state->dmabuf;
-				goto found_virt;
-			}
-		}
-		card = card->next;
-	}
-	/* no more virtual channel avaiable */
-	if (!state)
-		return -ENODEV;
-
-found_virt:
-	/* initialize the virtual channel */
-	state->virt = i;
-	state->card = card;
-	state->magic = I810_STATE_MAGIC;
-	init_waitqueue_head(&dmabuf->wait);
-	mutex_init(&state->open_mutex);
-	file->private_data = state;
-	dmabuf->trigger = 0;
-
-	/* allocate hardware channels */
-	if(file->f_mode & FMODE_READ) {
-		if((dmabuf->read_channel = card->alloc_rec_pcm_channel(card)) == NULL) {
-			kfree (card->states[i]);
-			card->states[i] = NULL;
-			return -EBUSY;
-		}
-		dmabuf->trigger |= PCM_ENABLE_INPUT;
-		i810_set_adc_rate(state, 8000);
-	}
-	if(file->f_mode & FMODE_WRITE) {
-		if((dmabuf->write_channel = card->alloc_pcm_channel(card)) == NULL) {
-			/* make sure we free the record channel allocated above */
-			if(file->f_mode & FMODE_READ)
-				card->free_pcm_channel(card,dmabuf->read_channel->num);
-			kfree (card->states[i]);
-			card->states[i] = NULL;
-			return -EBUSY;
-		}
-		/* Initialize to 8kHz?  What if we don't support 8kHz? */
-		/*  Let's change this to check for S/PDIF stuff */
-	
-		dmabuf->trigger |= PCM_ENABLE_OUTPUT;
-		if ( spdif_locked ) {
-			i810_set_dac_rate(state, spdif_locked);
-			i810_set_spdif_output(state, AC97_EA_SPSA_3_4, spdif_locked);
-		} else {
-			i810_set_dac_rate(state, 8000);
-			/* Put the ACLink in 2 channel mode by default */
-			i = I810_IOREADL(card, GLOB_CNT);
-			I810_IOWRITEL(i & 0xffcfffff, card, GLOB_CNT);
-		}
-	}
-		
-	/* set default sample format. According to OSS Programmer's Guide  /dev/dsp
-	   should be default to unsigned 8-bits, mono, with sample rate 8kHz and
-	   /dev/dspW will accept 16-bits sample, but we don't support those so we
-	   set it immediately to stereo and 16bit, which is all we do support */
-	dmabuf->fmt |= I810_FMT_16BIT | I810_FMT_STEREO;
-	dmabuf->ossfragsize = 0;
-	dmabuf->ossmaxfrags  = 0;
-	dmabuf->subdivision  = 0;
-
-	state->open_mode |= file->f_mode & (FMODE_READ | FMODE_WRITE);
-
-	return nonseekable_open(inode, file);
-}
-
-static int i810_release(struct inode *inode, struct file *file)
-{
-	struct i810_state *state = (struct i810_state *)file->private_data;
-	struct i810_card *card = state->card;
-	struct dmabuf *dmabuf = &state->dmabuf;
-	unsigned long flags;
-
-	lock_kernel();
-
-	/* stop DMA state machine and free DMA buffers/channels */
-	if(dmabuf->trigger & PCM_ENABLE_OUTPUT) {
-		drain_dac(state, 0);
-	}
-	if(dmabuf->trigger & PCM_ENABLE_INPUT) {
-		stop_adc(state);
-	}
-	spin_lock_irqsave(&card->lock, flags);
-	dealloc_dmabuf(state);
-	if (file->f_mode & FMODE_WRITE) {
-		state->card->free_pcm_channel(state->card, dmabuf->write_channel->num);
-	}
-	if (file->f_mode & FMODE_READ) {
-		state->card->free_pcm_channel(state->card, dmabuf->read_channel->num);
-	}
-
-	state->card->states[state->virt] = NULL;
-	kfree(state);
-	spin_unlock_irqrestore(&card->lock, flags);
-	unlock_kernel();
-
-	return 0;
-}
-
-static /*const*/ struct file_operations i810_audio_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.read		= i810_read,
-	.write		= i810_write,
-	.poll		= i810_poll,
-	.ioctl		= i810_ioctl,
-	.mmap		= i810_mmap,
-	.open		= i810_open,
-	.release	= i810_release,
-};
-
-/* Write AC97 codec registers */
-
-static u16 i810_ac97_get_mmio(struct ac97_codec *dev, u8 reg)
-{
-	struct i810_card *card = dev->private_data;
-	int count = 100;
-	u16 reg_set = IO_REG_OFF(dev) | (reg&0x7f);
-	
-	while(count-- && (readb(card->iobase_mmio + CAS) & 1)) 
-		udelay(1);
-	
-#ifdef DEBUG_MMIO
-	{
-		u16 ans = readw(card->ac97base_mmio + reg_set);
-		printk(KERN_DEBUG "i810_audio: ac97_get_mmio(%d) -> 0x%04X\n", ((int) reg_set) & 0xffff, (u32) ans);
-		return ans;
-	}
-#else
-	return readw(card->ac97base_mmio + reg_set);
-#endif
-}
-
-static u16 i810_ac97_get_io(struct ac97_codec *dev, u8 reg)
-{
-	struct i810_card *card = dev->private_data;
-	int count = 100;
-	u16 reg_set = IO_REG_OFF(dev) | (reg&0x7f);
-	
-	while(count-- && (I810_IOREADB(card, CAS) & 1)) 
-		udelay(1);
-	
-	return inw(card->ac97base + reg_set);
-}
-
-static void i810_ac97_set_mmio(struct ac97_codec *dev, u8 reg, u16 data)
-{
-	struct i810_card *card = dev->private_data;
-	int count = 100;
-	u16 reg_set = IO_REG_OFF(dev) | (reg&0x7f);
-	
-	while(count-- && (readb(card->iobase_mmio + CAS) & 1)) 
-		udelay(1);
-	
-	writew(data, card->ac97base_mmio + reg_set);
-
-#ifdef DEBUG_MMIO
-	printk(KERN_DEBUG "i810_audio: ac97_set_mmio(0x%04X, %d)\n", (u32) data, ((int) reg_set) & 0xffff);
-#endif
-}
-
-static void i810_ac97_set_io(struct ac97_codec *dev, u8 reg, u16 data)
-{
-	struct i810_card *card = dev->private_data;
-	int count = 100;
-	u16 reg_set = IO_REG_OFF(dev) | (reg&0x7f);
-	
-	while(count-- && (I810_IOREADB(card, CAS) & 1)) 
-		udelay(1);
-	
-        outw(data, card->ac97base + reg_set);
-}
-
-static u16 i810_ac97_get(struct ac97_codec *dev, u8 reg)
-{
-	struct i810_card *card = dev->private_data;
-	u16 ret;
-	
-	spin_lock(&card->ac97_lock);
-	if (card->use_mmio) {
-		ret = i810_ac97_get_mmio(dev, reg);
-	}
-	else {
-		ret = i810_ac97_get_io(dev, reg);
-	}
-	spin_unlock(&card->ac97_lock);
-	
-	return ret;
-}
-
-static void i810_ac97_set(struct ac97_codec *dev, u8 reg, u16 data)
-{
-	struct i810_card *card = dev->private_data;
-	
-	spin_lock(&card->ac97_lock);
-	if (card->use_mmio) {
-		i810_ac97_set_mmio(dev, reg, data);
-	}
-	else {
-		i810_ac97_set_io(dev, reg, data);
-	}
-	spin_unlock(&card->ac97_lock);
-}
-
-
-/* OSS /dev/mixer file operation methods */
-
-static int i810_open_mixdev(struct inode *inode, struct file *file)
-{
-	int i;
-	int minor = iminor(inode);
-	struct i810_card *card = devs;
-
-	for (card = devs; card != NULL; card = card->next) {
-		/*
-		 * If we are initializing and then fail, card could go
-		 * away unuexpectedly while we are in the for() loop.
-		 * So, check for card on each iteration before we check
-		 * for card->initializing to avoid a possible oops.
-		 * This usually only matters for times when the driver is
-		 * autoloaded by kmod.
-		 */
-		for (i = 0; i < 50 && card && card->initializing; i++) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ/20);
-		}
-		for (i = 0; i < NR_AC97 && card && !card->initializing; i++) 
-			if (card->ac97_codec[i] != NULL &&
-			    card->ac97_codec[i]->dev_mixer == minor) {
-				file->private_data = card->ac97_codec[i];
-				return nonseekable_open(inode, file);
-			}
-	}
-	return -ENODEV;
-}
-
-static int i810_ioctl_mixdev(struct inode *inode, struct file *file, unsigned int cmd,
-				unsigned long arg)
-{
-	struct ac97_codec *codec = (struct ac97_codec *)file->private_data;
-
-	return codec->mixer_ioctl(codec, cmd, arg);
-}
-
-static /*const*/ struct file_operations i810_mixer_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.ioctl		= i810_ioctl_mixdev,
-	.open		= i810_open_mixdev,
-};
-
-/* AC97 codec initialisation.  These small functions exist so we don't
-   duplicate code between module init and apm resume */
-
-static inline int i810_ac97_exists(struct i810_card *card, int ac97_number)
-{
-	u32 reg = I810_IOREADL(card, GLOB_STA);
-	switch (ac97_number) {
-	case 0:
-		return reg & (1<<8);
-	case 1: 
-		return reg & (1<<9);
-	case 2:
-		return reg & (1<<28);
-	}
-	return 0;
-}
-
-static inline int i810_ac97_enable_variable_rate(struct ac97_codec *codec)
-{
-	i810_ac97_set(codec, AC97_EXTENDED_STATUS, 9);
-	i810_ac97_set(codec,AC97_EXTENDED_STATUS,
-		      i810_ac97_get(codec, AC97_EXTENDED_STATUS)|0xE800);
-	
-	return (i810_ac97_get(codec, AC97_EXTENDED_STATUS)&1);
-}
-
-
-static int i810_ac97_probe_and_powerup(struct i810_card *card,struct ac97_codec *codec)
-{
-	/* Returns 0 on failure */
-	int i;
-
-	if (ac97_probe_codec(codec) == 0) return 0;
-	
-	/* power it all up */
-	i810_ac97_set(codec, AC97_POWER_CONTROL,
-		      i810_ac97_get(codec, AC97_POWER_CONTROL) & ~0x7f00);
-
-	/* wait for analog ready */
-	for (i=100; i && ((i810_ac97_get(codec, AC97_POWER_CONTROL) & 0xf) != 0xf); i--)
-	{
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ/20);
-	} 
-	return i;
-}
-
-static int is_new_ich(u16 pci_id)
-{
-	switch (pci_id) {
-	case PCI_DEVICE_ID_INTEL_82801DB_5:
-	case PCI_DEVICE_ID_INTEL_82801EB_5:
-	case PCI_DEVICE_ID_INTEL_ESB_5:
-	case PCI_DEVICE_ID_INTEL_ICH6_18:
-		return 1;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-static inline int ich_use_mmio(struct i810_card *card)
-{
-	return is_new_ich(card->pci_id) && card->use_mmio;
-}
-
-/**
- *	i810_ac97_power_up_bus	-	bring up AC97 link
- *	@card : ICH audio device to power up
- *
- *	Bring up the ACLink AC97 codec bus
- */
- 
-static int i810_ac97_power_up_bus(struct i810_card *card)
-{	
-	u32 reg = I810_IOREADL(card, GLOB_CNT);
-	int i;
-	int primary_codec_id = 0;
-
-	if((reg&2)==0)	/* Cold required */
-		reg|=2;
-	else
-		reg|=4;	/* Warm */
-		
-	reg&=~8;	/* ACLink on */
-	
-	/* At this point we deassert AC_RESET # */
-	I810_IOWRITEL(reg , card, GLOB_CNT);
-
-	/* We must now allow time for the Codec initialisation.
-	   600mS is the specified time */
-	   	
-	for(i=0;i<10;i++)
-	{
-		if((I810_IOREADL(card, GLOB_CNT)&4)==0)
-			break;
-
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ/20);
-	}
-	if(i==10)
-	{
-		printk(KERN_ERR "i810_audio: AC'97 reset failed.\n");
-		return 0;
-	}
-
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(HZ/2);
-
-	/*
-	 *	See if the primary codec comes ready. This must happen
-	 *	before we start doing DMA stuff
-	 */	
-	/* see i810_ac97_init for the next 10 lines (jsaw) */
-	if (card->use_mmio)
-		readw(card->ac97base_mmio);
-	else
-		inw(card->ac97base);
-	if (ich_use_mmio(card)) {
-		primary_codec_id = (int) readl(card->iobase_mmio + SDM) & 0x3;
-		printk(KERN_INFO "i810_audio: Primary codec has ID %d\n",
-		       primary_codec_id);
-	}
-
-	if(! i810_ac97_exists(card, primary_codec_id))
-	{
-		printk(KERN_INFO "i810_audio: Codec not ready.. wait.. ");
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ);	/* actually 600mS by the spec */
-
-		if(i810_ac97_exists(card, primary_codec_id))
-			printk("OK\n");
-		else 
-			printk("no response.\n");
-	}
-	if (card->use_mmio)
-		readw(card->ac97base_mmio);
-	else
-		inw(card->ac97base);
-	return 1;
-}
-
-static int __devinit i810_ac97_init(struct i810_card *card)
-{
-	int num_ac97 = 0;
-	int ac97_id;
-	int total_channels = 0;
-	int nr_ac97_max = card_cap[card->pci_id_internal].nr_ac97;
-	struct ac97_codec *codec;
-	u16 eid;
-	u32 reg;
-
-	if(!i810_ac97_power_up_bus(card)) return 0;
-
-	/* Number of channels supported */
-	/* What about the codec?  Just because the ICH supports */
-	/* multiple channels doesn't mean the codec does.       */
-	/* we'll have to modify this in the codec section below */
-	/* to reflect what the codec has.                       */
-	/* ICH and ICH0 only support 2 channels so don't bother */
-	/* to check....                                         */
-
-	card->channels = 2;
-	reg = I810_IOREADL(card, GLOB_STA);
-	if ( reg & 0x0200000 )
-		card->channels = 6;
-	else if ( reg & 0x0100000 )
-		card->channels = 4;
-	printk(KERN_INFO "i810_audio: Audio Controller supports %d channels.\n", card->channels);
-	printk(KERN_INFO "i810_audio: Defaulting to base 2 channel mode.\n");
-	reg = I810_IOREADL(card, GLOB_CNT);
-	I810_IOWRITEL(reg & 0xffcfffff, card, GLOB_CNT);
-		
-	for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) 
-		card->ac97_codec[num_ac97] = NULL;
-
-	/*@FIXME I don't know, if I'm playing to safe here... (jsaw) */
-	if ((nr_ac97_max > 2) && !card->use_mmio) nr_ac97_max = 2;
-
-	for (num_ac97 = 0; num_ac97 < nr_ac97_max; num_ac97++) {
-		/* codec reset */
-		printk(KERN_INFO "i810_audio: Resetting connection %d\n", num_ac97);
-		if (card->use_mmio)
-			readw(card->ac97base_mmio + 0x80*num_ac97);
-		else
-			inw(card->ac97base + 0x80*num_ac97);
-
-		/* If we have the SDATA_IN Map Register, as on ICH4, we
-		   do not loop thru all possible codec IDs but thru all 
-		   possible IO channels. Bit 0:1 of SDM then holds the 
-		   last codec ID spoken to. 
-		*/
-		if (ich_use_mmio(card)) {
-			ac97_id = (int) readl(card->iobase_mmio + SDM) & 0x3;
-			printk(KERN_INFO "i810_audio: Connection %d with codec id %d\n",
-			       num_ac97, ac97_id);
-		}
-		else {
-			ac97_id = num_ac97;
-		}
-
-		/* The ICH programmer's reference says you should   */
-		/* check the ready status before probing. So we chk */
-		/*   What do we do if it's not ready?  Wait and try */
-		/*   again, or abort?                               */
-		if (!i810_ac97_exists(card, ac97_id)) {
-			if(num_ac97 == 0)
-				printk(KERN_ERR "i810_audio: Primary codec not ready.\n");
-		}
-		
-		if ((codec = ac97_alloc_codec()) == NULL)
-			return -ENOMEM;
-
-		/* initialize some basic codec information, other fields will be filled
-		   in ac97_probe_codec */
-		codec->private_data = card;
-		codec->id = ac97_id;
-		card->ac97_id_map[ac97_id] = num_ac97 * 0x80;
-
-		if (card->use_mmio) {	
-			codec->codec_read = i810_ac97_get_mmio;
-			codec->codec_write = i810_ac97_set_mmio;
-		}
-		else {
-			codec->codec_read = i810_ac97_get_io;
-			codec->codec_write = i810_ac97_set_io;
-		}
-	
-		if(!i810_ac97_probe_and_powerup(card,codec)) {
-			printk(KERN_ERR "i810_audio: timed out waiting for codec %d analog ready.\n", ac97_id);
-			ac97_release_codec(codec);
-			break;	/* it didn't work */
-		}
-		/* Store state information about S/PDIF transmitter */
-		card->ac97_status = 0;
-		
-		/* Don't attempt to get eid until powerup is complete */
-		eid = i810_ac97_get(codec, AC97_EXTENDED_ID);
-
-		if(eid==0xFFFF)
-		{
-			printk(KERN_WARNING "i810_audio: no codec attached ?\n");
-			ac97_release_codec(codec);
-			break;
-		}
-		
-		/* Check for an AC97 1.0 soft modem (ID1) */
-		
-		if(codec->modem)
-		{
-			printk(KERN_WARNING "i810_audio: codec %d is a softmodem - skipping.\n", ac97_id);
-			ac97_release_codec(codec);
-			continue;
-		}
-		
-		card->ac97_features = eid;
-
-		/* Now check the codec for useful features to make up for
-		   the dumbness of the 810 hardware engine */
-
-		if(!(eid&0x0001))
-			printk(KERN_WARNING "i810_audio: only 48Khz playback available.\n");
-		else
-		{
-			if(!i810_ac97_enable_variable_rate(codec)) {
-				printk(KERN_WARNING "i810_audio: Codec refused to allow VRA, using 48Khz only.\n");
-				card->ac97_features&=~1;
-			}			
-		}
-   		
-		/* Turn on the amplifier */
-
-		codec->codec_write(codec, AC97_POWER_CONTROL, 
-			 codec->codec_read(codec, AC97_POWER_CONTROL) & ~0x8000);
-				
-		/* Determine how many channels the codec(s) support   */
-		/*   - The primary codec always supports 2            */
-		/*   - If the codec supports AMAP, surround DACs will */
-		/*     automaticlly get assigned to slots.            */
-		/*     * Check for surround DACs and increment if     */
-		/*       found.                                       */
-		/*   - Else check if the codec is revision 2.2        */
-		/*     * If surround DACs exist, assign them to slots */
-		/*       and increment channel count.                 */
-
-		/* All of this only applies to ICH2 and above. ICH    */
-		/* and ICH0 only support 2 channels.  ICH2 will only  */
-		/* support multiple codecs in a "split audio" config. */
-		/* as described above.                                */
-
-		/* TODO: Remove all the debugging messages!           */
-
-		if((eid & 0xc000) == 0) /* primary codec */
-			total_channels += 2; 
-
-		if(eid & 0x200) { /* GOOD, AMAP support */
-			if (eid & 0x0080) /* L/R Surround channels */
-				total_channels += 2;
-			if (eid & 0x0140) /* LFE and Center channels */
-				total_channels += 2;
-			printk("i810_audio: AC'97 codec %d supports AMAP, total channels = %d\n", ac97_id, total_channels);
-		} else if (eid & 0x0400) {  /* this only works on 2.2 compliant codecs */
-			eid &= 0xffcf;
-			if((eid & 0xc000) != 0)	{
-				switch ( total_channels ) {
-					case 2:
-						/* Set dsa1, dsa0 to 01 */
-						eid |= 0x0010;
-						break;
-					case 4:
-						/* Set dsa1, dsa0 to 10 */
-						eid |= 0x0020;
-						break;
-					case 6:
-						/* Set dsa1, dsa0 to 11 */
-						eid |= 0x0030;
-						break;
-				}
-				total_channels += 2;
-			}
-			i810_ac97_set(codec, AC97_EXTENDED_ID, eid);
-			eid = i810_ac97_get(codec, AC97_EXTENDED_ID);
-			printk("i810_audio: AC'97 codec %d, new EID value = 0x%04x\n", ac97_id, eid);
-			if (eid & 0x0080) /* L/R Surround channels */
-				total_channels += 2;
-			if (eid & 0x0140) /* LFE and Center channels */
-				total_channels += 2;
-			printk("i810_audio: AC'97 codec %d, DAC map configured, total channels = %d\n", ac97_id, total_channels);
-		} else {
-			printk("i810_audio: AC'97 codec %d Unable to map surround DAC's (or DAC's not present), total channels = %d\n", ac97_id, total_channels);
-		}
-
-		if ((codec->dev_mixer = register_sound_mixer(&i810_mixer_fops, -1)) < 0) {
-			printk(KERN_ERR "i810_audio: couldn't register mixer!\n");
-			ac97_release_codec(codec);
-			break;
-		}
-
-		card->ac97_codec[num_ac97] = codec;
-	}
-
-	/* tune up the primary codec */
-	ac97_tune_hardware(card->pci_dev, ac97_quirks, ac97_quirk);
-
-	/* pick the minimum of channels supported by ICHx or codec(s) */
-	card->channels = (card->channels > total_channels)?total_channels:card->channels;
-
-	return num_ac97;
-}
-
-static void __devinit i810_configure_clocking (void)
-{
-	struct i810_card *card;
-	struct i810_state *state;
-	struct dmabuf *dmabuf;
-	unsigned int i, offset, new_offset;
-	unsigned long flags;
-
-	card = devs;
-	/* We could try to set the clocking for multiple cards, but can you even have
-	 * more than one i810 in a machine?  Besides, clocking is global, so unless
-	 * someone actually thinks more than one i810 in a machine is possible and
-	 * decides to rewrite that little bit, setting the rate for more than one card
-	 * is a waste of time.
-	 */
-	if(card != NULL) {
-		state = card->states[0] = (struct i810_state *)
-					kzalloc(sizeof(struct i810_state), GFP_KERNEL);
-		if (state == NULL)
-			return;
-		dmabuf = &state->dmabuf;
-
-		dmabuf->write_channel = card->alloc_pcm_channel(card);
-		state->virt = 0;
-		state->card = card;
-		state->magic = I810_STATE_MAGIC;
-		init_waitqueue_head(&dmabuf->wait);
-		mutex_init(&state->open_mutex);
-		dmabuf->fmt = I810_FMT_STEREO | I810_FMT_16BIT;
-		dmabuf->trigger = PCM_ENABLE_OUTPUT;
-		i810_set_spdif_output(state, -1, 0);
-		i810_set_dac_channels(state, 2);
-		i810_set_dac_rate(state, 48000);
-		if(prog_dmabuf(state, 0) != 0) {
-			goto config_out_nodmabuf;
-		}
-		if(dmabuf->dmasize < 16384) {
-			goto config_out;
-		}
-		dmabuf->count = dmabuf->dmasize;
-		CIV_TO_LVI(card, dmabuf->write_channel->port, -1);
-		local_irq_save(flags);
-		start_dac(state);
-		offset = i810_get_dma_addr(state, 0);
-		mdelay(50);
-		new_offset = i810_get_dma_addr(state, 0);
-		stop_dac(state);
-		local_irq_restore(flags);
-		i = new_offset - offset;
-#ifdef DEBUG_INTERRUPTS
-		printk("i810_audio: %d bytes in 50 milliseconds\n", i);
-#endif
-		if(i == 0)
-			goto config_out;
-		i = i / 4 * 20;
-		if (i > 48500 || i < 47500) {
-			clocking = clocking * clocking / i;
-			printk("i810_audio: setting clocking to %d\n", clocking);
-		}
-config_out:
-		dealloc_dmabuf(state);
-config_out_nodmabuf:
-		state->card->free_pcm_channel(state->card,state->dmabuf.write_channel->num);
-		kfree(state);
-		card->states[0] = NULL;
-	}
-}
-
-/* install the driver, we do not allocate hardware channel nor DMA buffer now, they are defered 
-   until "ACCESS" time (in prog_dmabuf called by open/read/write/ioctl/mmap) */
-   
-static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_id)
-{
-	struct i810_card *card;
-
-	if (pci_enable_device(pci_dev))
-		return -EIO;
-
-	if (pci_set_dma_mask(pci_dev, I810_DMA_MASK)) {
-		printk(KERN_ERR "i810_audio: architecture does not support"
-		       " 32bit PCI busmaster DMA\n");
-		return -ENODEV;
-	}
-	
-	if ((card = kzalloc(sizeof(struct i810_card), GFP_KERNEL)) == NULL) {
-		printk(KERN_ERR "i810_audio: out of memory\n");
-		return -ENOMEM;
-	}
-
-	card->initializing = 1;
-	card->pci_dev = pci_dev;
-	card->pci_id = pci_id->device;
-	card->ac97base = pci_resource_start (pci_dev, 0);
-	card->iobase = pci_resource_start (pci_dev, 1);
-
-	if (!(card->ac97base) || !(card->iobase)) {
-		card->ac97base = 0;
-		card->iobase = 0;
-	}
-
-	/* if chipset could have mmio capability, check it */ 
-	if (card_cap[pci_id->driver_data].flags & CAP_MMIO) {
-		card->ac97base_mmio_phys = pci_resource_start (pci_dev, 2);
-		card->iobase_mmio_phys = pci_resource_start (pci_dev, 3);
-
-		if ((card->ac97base_mmio_phys) && (card->iobase_mmio_phys)) {
-			card->use_mmio = 1;
-		}
-		else {
-			card->ac97base_mmio_phys = 0;
-			card->iobase_mmio_phys = 0;
-		}
-	}
-
-	if (!(card->use_mmio) && (!(card->iobase) || !(card->ac97base))) {
-		printk(KERN_ERR "i810_audio: No I/O resources available.\n");
-		goto out_mem;
-	}
-
-	card->irq = pci_dev->irq;
-	card->next = devs;
-	card->magic = I810_CARD_MAGIC;
-#ifdef CONFIG_PM
-	card->pm_suspended=0;
-#endif
-	spin_lock_init(&card->lock);
-	spin_lock_init(&card->ac97_lock);
-	devs = card;
-
-	pci_set_master(pci_dev);
-
-	printk(KERN_INFO "i810: %s found at IO 0x%04lx and 0x%04lx, "
-	       "MEM 0x%04lx and 0x%04lx, IRQ %d\n",
-	       card_names[pci_id->driver_data], 
-	       card->iobase, card->ac97base, 
-	       card->ac97base_mmio_phys, card->iobase_mmio_phys,
-	       card->irq);
-
-	card->alloc_pcm_channel = i810_alloc_pcm_channel;
-	card->alloc_rec_pcm_channel = i810_alloc_rec_pcm_channel;
-	card->alloc_rec_mic_channel = i810_alloc_rec_mic_channel;
-	card->free_pcm_channel = i810_free_pcm_channel;
-
-	if ((card->channel = pci_alloc_consistent(pci_dev,
-	    sizeof(struct i810_channel)*NR_HW_CH, &card->chandma)) == NULL) {
-		printk(KERN_ERR "i810: cannot allocate channel DMA memory\n");
-		goto out_mem;
-	}
-
-	{ /* We may dispose of this altogether some time soon, so... */
-		struct i810_channel *cp = card->channel;
-
-		cp[0].offset = 0;
-		cp[0].port = 0x00;
-		cp[0].num = 0;
-		cp[1].offset = 0;
-		cp[1].port = 0x10;
-		cp[1].num = 1;
-		cp[2].offset = 0;
-		cp[2].port = 0x20;
-		cp[2].num = 2;
-	}
-
-	/* claim our iospace and irq */
-	if (!request_region(card->iobase, 64, card_names[pci_id->driver_data])) {
-		printk(KERN_ERR "i810_audio: unable to allocate region %lx\n", card->iobase);
-		goto out_region1;
-	}
-	if (!request_region(card->ac97base, 256, card_names[pci_id->driver_data])) {
-		printk(KERN_ERR "i810_audio: unable to allocate region %lx\n", card->ac97base);
-		goto out_region2;
-	}
-
-	if (card->use_mmio) {
-		if (request_mem_region(card->ac97base_mmio_phys, 512, "ich_audio MMBAR")) {
-			if ((card->ac97base_mmio = ioremap(card->ac97base_mmio_phys, 512))) { /*@FIXME can ioremap fail? don't know (jsaw) */
-				if (request_mem_region(card->iobase_mmio_phys, 256, "ich_audio MBBAR")) {
-					if ((card->iobase_mmio = ioremap(card->iobase_mmio_phys, 256))) {
-						printk(KERN_INFO "i810: %s mmio at 0x%04lx and 0x%04lx\n",
-						       card_names[pci_id->driver_data], 
-						       (unsigned long) card->ac97base_mmio, 
-						       (unsigned long) card->iobase_mmio); 
-					}
-					else {
-						iounmap(card->ac97base_mmio);
-						release_mem_region(card->ac97base_mmio_phys, 512);
-						release_mem_region(card->iobase_mmio_phys, 512);
-						card->use_mmio = 0;
-					}
-				}
-				else {
-					iounmap(card->ac97base_mmio);
-					release_mem_region(card->ac97base_mmio_phys, 512);
-					card->use_mmio = 0;
-				}
-			}
-		}
-		else {
-			card->use_mmio = 0;
-		}
-	}
-
-	/* initialize AC97 codec and register /dev/mixer */
-	if (i810_ac97_init(card) <= 0)
-		goto out_iospace;
-	pci_set_drvdata(pci_dev, card);
-
-	if(clocking == 0) {
-		clocking = 48000;
-		i810_configure_clocking();
-	}
-
-	/* register /dev/dsp */
-	if ((card->dev_audio = register_sound_dsp(&i810_audio_fops, -1)) < 0) {
-		int i;
-		printk(KERN_ERR "i810_audio: couldn't register DSP device!\n");
-		for (i = 0; i < NR_AC97; i++)
-		if (card->ac97_codec[i] != NULL) {
-			unregister_sound_mixer(card->ac97_codec[i]->dev_mixer);
-			ac97_release_codec(card->ac97_codec[i]);
-		}
-		goto out_iospace;
-	}
-
-	if (request_irq(card->irq, &i810_interrupt, IRQF_SHARED,
-			card_names[pci_id->driver_data], card)) {
-		printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq);
-		goto out_iospace;
-	}
-
-
- 	card->initializing = 0;
-	return 0;
-
-out_iospace:
-	if (card->use_mmio) {
-		iounmap(card->ac97base_mmio);
-		iounmap(card->iobase_mmio);
-		release_mem_region(card->ac97base_mmio_phys, 512);
-		release_mem_region(card->iobase_mmio_phys, 256);
-	}
-	release_region(card->ac97base, 256);
-out_region2:
-	release_region(card->iobase, 64);
-out_region1:
-	pci_free_consistent(pci_dev, sizeof(struct i810_channel)*NR_HW_CH,
-	    card->channel, card->chandma);
-out_mem:
-	kfree(card);
-	return -ENODEV;
-}
-
-static void __devexit i810_remove(struct pci_dev *pci_dev)
-{
-	int i;
-	struct i810_card *card = pci_get_drvdata(pci_dev);
-	/* free hardware resources */
-	free_irq(card->irq, devs);
-	release_region(card->iobase, 64);
-	release_region(card->ac97base, 256);
-	pci_free_consistent(pci_dev, sizeof(struct i810_channel)*NR_HW_CH,
-			    card->channel, card->chandma);
-	if (card->use_mmio) {
-		iounmap(card->ac97base_mmio);
-		iounmap(card->iobase_mmio);
-		release_mem_region(card->ac97base_mmio_phys, 512);
-		release_mem_region(card->iobase_mmio_phys, 256);
-	}
-
-	/* unregister audio devices */
-	for (i = 0; i < NR_AC97; i++)
-		if (card->ac97_codec[i] != NULL) {
-			unregister_sound_mixer(card->ac97_codec[i]->dev_mixer);
-			ac97_release_codec(card->ac97_codec[i]);
-			card->ac97_codec[i] = NULL;
-		}
-	unregister_sound_dsp(card->dev_audio);
-	kfree(card);
-}
-
-#ifdef CONFIG_PM
-static int i810_pm_suspend(struct pci_dev *dev, pm_message_t pm_state)
-{
-        struct i810_card *card = pci_get_drvdata(dev);
-        struct i810_state *state;
-	unsigned long flags;
-	struct dmabuf *dmabuf;
-	int i,num_ac97;
-#ifdef DEBUG
-	printk("i810_audio: i810_pm_suspend called\n");
-#endif
-	if(!card) return 0;
-	spin_lock_irqsave(&card->lock, flags);
-	card->pm_suspended=1;
-	for(i=0;i<NR_HW_CH;i++) {
-		state = card->states[i];
-		if(!state) continue;
-		/* this happens only if there are open files */
-		dmabuf = &state->dmabuf;
-		if(dmabuf->enable & DAC_RUNNING ||
-		   (dmabuf->count && (dmabuf->trigger & PCM_ENABLE_OUTPUT))) {
-			state->pm_saved_dac_rate=dmabuf->rate;
-			stop_dac(state);
-		} else {
-			state->pm_saved_dac_rate=0;
-		}
-		if(dmabuf->enable & ADC_RUNNING) {
-			state->pm_saved_adc_rate=dmabuf->rate;	
-			stop_adc(state);
-		} else {
-			state->pm_saved_adc_rate=0;
-		}
-		dmabuf->ready = 0;
-		dmabuf->swptr = dmabuf->hwptr = 0;
-		dmabuf->count = dmabuf->total_bytes = 0;
-	}
-
-	spin_unlock_irqrestore(&card->lock, flags);
-
-	/* save mixer settings */
-	for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) {
-		struct ac97_codec *codec = card->ac97_codec[num_ac97];
-		if(!codec) continue;
-		for(i=0;i< SOUND_MIXER_NRDEVICES ;i++) {
-			if((supported_mixer(codec,i)) &&
-			   (codec->read_mixer)) {
-				card->pm_saved_mixer_settings[i][num_ac97]=
-					codec->read_mixer(codec,i);
-			}
-		}
-	}
-	pci_save_state(dev); /* XXX do we need this? */
-	pci_disable_device(dev); /* disable busmastering */
-	pci_set_power_state(dev,3); /* Zzz. */
-
-	return 0;
-}
-
-
-static int i810_pm_resume(struct pci_dev *dev)
-{
-	int num_ac97,i=0;
-	struct i810_card *card=pci_get_drvdata(dev);
-	pci_enable_device(dev);
-	pci_restore_state (dev);
-
-	/* observation of a toshiba portege 3440ct suggests that the 
-	   hardware has to be more or less completely reinitialized from
-	   scratch after an apm suspend.  Works For Me.   -dan */
-
-	i810_ac97_power_up_bus(card);
-
-	for (num_ac97 = 0; num_ac97 < NR_AC97; num_ac97++) {
-		struct ac97_codec *codec = card->ac97_codec[num_ac97];
-		/* check they haven't stolen the hardware while we were
-		   away */
-		if(!codec || !i810_ac97_exists(card,num_ac97)) {
-			if(num_ac97) continue;
-			else BUG();
-		}
-		if(!i810_ac97_probe_and_powerup(card,codec)) BUG();
-		
-		if((card->ac97_features&0x0001)) {
-			/* at probe time we found we could do variable
-			   rates, but APM suspend has made it forget
-			   its magical powers */
-			if(!i810_ac97_enable_variable_rate(codec)) BUG();
-		}
-		/* we lost our mixer settings, so restore them */
-		for(i=0;i< SOUND_MIXER_NRDEVICES ;i++) {
-			if(supported_mixer(codec,i)){
-				int val=card->
-					pm_saved_mixer_settings[i][num_ac97];
-				codec->mixer_state[i]=val;
-				codec->write_mixer(codec,i,
-						   (val  & 0xff) ,
-						   ((val >> 8)  & 0xff) );
-			}
-		}
-	}
-
-	/* we need to restore the sample rate from whatever it was */
-	for(i=0;i<NR_HW_CH;i++) {
-		struct i810_state * state=card->states[i];
-		if(state) {
-			if(state->pm_saved_adc_rate)
-				i810_set_adc_rate(state,state->pm_saved_adc_rate);
-			if(state->pm_saved_dac_rate)
-				i810_set_dac_rate(state,state->pm_saved_dac_rate);
-		}
-	}
-
-	
-        card->pm_suspended = 0;
-
-	/* any processes that were reading/writing during the suspend
-	   probably ended up here */
-	for(i=0;i<NR_HW_CH;i++) {
-		struct i810_state *state = card->states[i];
-		if(state) wake_up(&state->dmabuf.wait);
-        }
-
-	return 0;
-}	
-#endif /* CONFIG_PM */
-
-MODULE_AUTHOR("The Linux kernel team");
-MODULE_DESCRIPTION("Intel 810 audio support");
-MODULE_LICENSE("GPL");
-module_param(ftsodell, int, 0444);
-module_param(clocking, uint, 0444);
-module_param(strict_clocking, int, 0444);
-module_param(spdif_locked, int, 0444);
-
-#define I810_MODULE_NAME "i810_audio"
-
-static struct pci_driver i810_pci_driver = {
-	.name		= I810_MODULE_NAME,
-	.id_table	= i810_pci_tbl,
-	.probe		= i810_probe,
-	.remove		= __devexit_p(i810_remove),
-#ifdef CONFIG_PM
-	.suspend	= i810_pm_suspend,
-	.resume		= i810_pm_resume,
-#endif /* CONFIG_PM */
-};
-
-
-static int __init i810_init_module (void)
-{
-	int retval;
-
-	printk(KERN_INFO "Intel 810 + AC97 Audio, version "
-	       DRIVER_VERSION ", " __TIME__ " " __DATE__ "\n");
-
-	retval = pci_register_driver(&i810_pci_driver);
-	if (retval)
-		return retval;
-
-	if(ftsodell != 0) {
-		printk("i810_audio: ftsodell is now a deprecated option.\n");
-	}
-	if(spdif_locked > 0 ) {
-		if(spdif_locked == 32000 || spdif_locked == 44100 || spdif_locked == 48000) {
-			printk("i810_audio: Enabling S/PDIF at sample rate %dHz.\n", spdif_locked);
-		} else {
-			printk("i810_audio: S/PDIF can only be locked to 32000, 44100, or 48000Hz.\n");
-			spdif_locked = 0;
-		}
-	}
-	
-	return 0;
-}
-
-static void __exit i810_cleanup_module (void)
-{
-	pci_unregister_driver(&i810_pci_driver);
-}
-
-module_init(i810_init_module);
-module_exit(i810_cleanup_module);
-
-/*
-Local Variables:
-c-basic-offset: 8
-End:
-*/
diff --git a/sound/oss/via82cxxx_audio.c b/sound/oss/via82cxxx_audio.c
deleted file mode 100644
index f95aa0946758..000000000000
--- a/sound/oss/via82cxxx_audio.c
+++ /dev/null
@@ -1,3616 +0,0 @@
-/*
- * Support for VIA 82Cxxx Audio Codecs
- * Copyright 1999,2000 Jeff Garzik
- *
- * Updated to support the VIA 8233/8235 audio subsystem
- * Alan Cox <alan@redhat.com> (C) Copyright 2002, 2003 Red Hat Inc
- *
- * Distributed under the GNU GENERAL PUBLIC LICENSE (GPL) Version 2.
- * See the "COPYING" file distributed with this software for more info.
- * NO WARRANTY
- *
- * For a list of known bugs (errata) and documentation,
- * see via-audio.pdf in Documentation/DocBook.
- * If this documentation does not exist, run "make pdfdocs".
- */
-
-
-#define VIA_VERSION	"1.9.1-ac4-2.5"
-
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-#include <linux/sound.h>
-#include <linux/poll.h>
-#include <linux/soundcard.h>
-#include <linux/ac97_codec.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <linux/mutex.h>
-
-#include "sound_config.h"
-#include "dev_table.h"
-#include "mpu401.h"
-
-
-#undef VIA_DEBUG	/* define to enable debugging output and checks */
-#ifdef VIA_DEBUG
-/* note: prints function name for you */
-#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
-#else
-#define DPRINTK(fmt, args...)
-#endif
-
-#undef VIA_NDEBUG	/* define to disable lightweight runtime checks */
-#ifdef VIA_NDEBUG
-#define assert(expr)
-#else
-#define assert(expr) \
-        if(!(expr)) {					\
-        printk( "Assertion failed! %s,%s,%s,line=%d\n",	\
-        #expr,__FILE__,__FUNCTION__,__LINE__);		\
-        }
-#endif
-
-#define VIA_SUPPORT_MMAP 1 /* buggy, for now... */
-
-#define MAX_CARDS	1
-
-#define VIA_CARD_NAME	"VIA 82Cxxx Audio driver " VIA_VERSION
-#define VIA_MODULE_NAME "via82cxxx"
-#define PFX		VIA_MODULE_NAME ": "
-
-#define VIA_COUNTER_LIMIT	100000
-
-/* size of DMA buffers */
-#define VIA_MAX_BUFFER_DMA_PAGES	32
-
-/* buffering default values in ms */
-#define VIA_DEFAULT_FRAG_TIME		20
-#define VIA_DEFAULT_BUFFER_TIME		500
-
-/* the hardware has a 256 fragment limit */
-#define VIA_MIN_FRAG_NUMBER		2
-#define VIA_MAX_FRAG_NUMBER		128
-
-#define VIA_MAX_FRAG_SIZE		PAGE_SIZE
-#define VIA_MIN_FRAG_SIZE		(VIA_MAX_BUFFER_DMA_PAGES * PAGE_SIZE / VIA_MAX_FRAG_NUMBER)
-
-
-/* 82C686 function 5 (audio codec) PCI configuration registers */
-#define VIA_ACLINK_STATUS	0x40
-#define VIA_ACLINK_CTRL		0x41
-#define VIA_FUNC_ENABLE		0x42
-#define VIA_PNP_CONTROL		0x43
-#define VIA_FM_NMI_CTRL		0x48
-
-/*
- * controller base 0 (scatter-gather) registers
- *
- * NOTE: Via datasheet lists first channel as "read"
- * channel and second channel as "write" channel.
- * I changed the naming of the constants to be more
- * clear than I felt the datasheet to be.
- */
-
-#define VIA_BASE0_PCM_OUT_CHAN	0x00 /* output PCM to user */
-#define VIA_BASE0_PCM_OUT_CHAN_STATUS 0x00
-#define VIA_BASE0_PCM_OUT_CHAN_CTRL	0x01
-#define VIA_BASE0_PCM_OUT_CHAN_TYPE	0x02
-
-#define VIA_BASE0_PCM_IN_CHAN		0x10 /* input PCM from user */
-#define VIA_BASE0_PCM_IN_CHAN_STATUS	0x10
-#define VIA_BASE0_PCM_IN_CHAN_CTRL	0x11
-#define VIA_BASE0_PCM_IN_CHAN_TYPE	0x12
-
-/* offsets from base */
-#define VIA_PCM_STATUS			0x00
-#define VIA_PCM_CONTROL			0x01
-#define VIA_PCM_TYPE			0x02
-#define VIA_PCM_LEFTVOL			0x02
-#define VIA_PCM_RIGHTVOL		0x03
-#define VIA_PCM_TABLE_ADDR		0x04
-#define VIA_PCM_STOPRATE		0x08	/* 8233+ */
-#define VIA_PCM_BLOCK_COUNT		0x0C
-
-/* XXX unused DMA channel for FM PCM data */
-#define VIA_BASE0_FM_OUT_CHAN		0x20
-#define VIA_BASE0_FM_OUT_CHAN_STATUS	0x20
-#define VIA_BASE0_FM_OUT_CHAN_CTRL	0x21
-#define VIA_BASE0_FM_OUT_CHAN_TYPE	0x22
-
-/* Six channel audio output on 8233 */
-#define VIA_BASE0_MULTI_OUT_CHAN		0x40
-#define VIA_BASE0_MULTI_OUT_CHAN_STATUS		0x40
-#define VIA_BASE0_MULTI_OUT_CHAN_CTRL		0x41
-#define VIA_BASE0_MULTI_OUT_CHAN_TYPE		0x42
-
-#define VIA_BASE0_AC97_CTRL		0x80
-#define VIA_BASE0_SGD_STATUS_SHADOW	0x84
-#define VIA_BASE0_GPI_INT_ENABLE	0x8C
-#define VIA_INTR_OUT			((1<<0) |  (1<<4) |  (1<<8))
-#define VIA_INTR_IN			((1<<1) |  (1<<5) |  (1<<9))
-#define VIA_INTR_FM			((1<<2) |  (1<<6) | (1<<10))
-#define VIA_INTR_MASK		(VIA_INTR_OUT | VIA_INTR_IN | VIA_INTR_FM)
-
-/* Newer VIA we need to monitor the low 3 bits of each channel. This
-   mask covers the channels we don't yet use as well 
- */
- 
-#define VIA_NEW_INTR_MASK		0x77077777UL
-
-/* VIA_BASE0_AUDIO_xxx_CHAN_TYPE bits */
-#define VIA_IRQ_ON_FLAG			(1<<0)	/* int on each flagged scatter block */
-#define VIA_IRQ_ON_EOL			(1<<1)	/* int at end of scatter list */
-#define VIA_INT_SEL_PCI_LAST_LINE_READ	(0)	/* int at PCI read of last line */
-#define VIA_INT_SEL_LAST_SAMPLE_SENT	(1<<2)	/* int at last sample sent */
-#define VIA_INT_SEL_ONE_LINE_LEFT	(1<<3)	/* int at less than one line to send */
-#define VIA_PCM_FMT_STEREO		(1<<4)	/* PCM stereo format (bit clear == mono) */
-#define VIA_PCM_FMT_16BIT		(1<<5)	/* PCM 16-bit format (bit clear == 8-bit) */
-#define VIA_PCM_REC_FIFO		(1<<6)	/* PCM Recording FIFO */
-#define VIA_RESTART_SGD_ON_EOL		(1<<7)	/* restart scatter-gather at EOL */
-#define VIA_PCM_FMT_MASK		(VIA_PCM_FMT_STEREO|VIA_PCM_FMT_16BIT)
-#define VIA_CHAN_TYPE_MASK		(VIA_RESTART_SGD_ON_EOL | \
-					 VIA_IRQ_ON_FLAG | \
-					 VIA_IRQ_ON_EOL)
-#define VIA_CHAN_TYPE_INT_SELECT	(VIA_INT_SEL_LAST_SAMPLE_SENT)
-
-/* PCI configuration register bits and masks */
-#define VIA_CR40_AC97_READY	0x01
-#define VIA_CR40_AC97_LOW_POWER	0x02
-#define VIA_CR40_SECONDARY_READY 0x04
-
-#define VIA_CR41_AC97_ENABLE	0x80 /* enable AC97 codec */
-#define VIA_CR41_AC97_RESET	0x40 /* clear bit to reset AC97 */
-#define VIA_CR41_AC97_WAKEUP	0x20 /* wake up from power-down mode */
-#define VIA_CR41_AC97_SDO	0x10 /* force Serial Data Out (SDO) high */
-#define VIA_CR41_VRA		0x08 /* enable variable sample rate */
-#define VIA_CR41_PCM_ENABLE	0x04 /* AC Link SGD Read Channel PCM Data Output */
-#define VIA_CR41_FM_PCM_ENABLE	0x02 /* AC Link FM Channel PCM Data Out */
-#define VIA_CR41_SB_PCM_ENABLE	0x01 /* AC Link SB PCM Data Output */
-#define VIA_CR41_BOOT_MASK	(VIA_CR41_AC97_ENABLE | \
-				 VIA_CR41_AC97_WAKEUP | \
-				 VIA_CR41_AC97_SDO)
-#define VIA_CR41_RUN_MASK	(VIA_CR41_AC97_ENABLE | \
-				 VIA_CR41_AC97_RESET | \
-				 VIA_CR41_VRA | \
-				 VIA_CR41_PCM_ENABLE)
-
-#define VIA_CR42_SB_ENABLE	0x01
-#define VIA_CR42_MIDI_ENABLE	0x02
-#define VIA_CR42_FM_ENABLE	0x04
-#define VIA_CR42_GAME_ENABLE	0x08
-#define VIA_CR42_MIDI_IRQMASK   0x40
-#define VIA_CR42_MIDI_PNP	0x80
-
-#define VIA_CR44_SECOND_CODEC_SUPPORT	(1 << 6)
-#define VIA_CR44_AC_LINK_ACCESS		(1 << 7)
-
-#define VIA_CR48_FM_TRAP_TO_NMI		(1 << 2)
-
-/* controller base 0 register bitmasks */
-#define VIA_INT_DISABLE_MASK		(~(0x01|0x02))
-#define VIA_SGD_STOPPED			(1 << 2)
-#define VIA_SGD_PAUSED			(1 << 6)
-#define VIA_SGD_ACTIVE			(1 << 7)
-#define VIA_SGD_TERMINATE		(1 << 6)
-#define VIA_SGD_FLAG			(1 << 0)
-#define VIA_SGD_EOL			(1 << 1)
-#define VIA_SGD_START			(1 << 7)
-
-#define VIA_CR80_FIRST_CODEC		0
-#define VIA_CR80_SECOND_CODEC		(1 << 30)
-#define VIA_CR80_FIRST_CODEC_VALID	(1 << 25)
-#define VIA_CR80_VALID			(1 << 25)
-#define VIA_CR80_SECOND_CODEC_VALID	(1 << 27)
-#define VIA_CR80_BUSY			(1 << 24)
-#define VIA_CR83_BUSY			(1)
-#define VIA_CR83_FIRST_CODEC_VALID	(1 << 1)
-#define VIA_CR80_READ			(1 << 23)
-#define VIA_CR80_WRITE_MODE		0
-#define VIA_CR80_REG_IDX(idx)		((((idx) & 0xFF) >> 1) << 16)
-
-/* capabilities we announce */
-#ifdef VIA_SUPPORT_MMAP
-#define VIA_DSP_CAP (DSP_CAP_REVISION | DSP_CAP_DUPLEX | DSP_CAP_MMAP | \
-		     DSP_CAP_TRIGGER | DSP_CAP_REALTIME)
-#else
-#define VIA_DSP_CAP (DSP_CAP_REVISION | DSP_CAP_DUPLEX | \
-		     DSP_CAP_TRIGGER | DSP_CAP_REALTIME)
-#endif
-
-/* scatter-gather DMA table entry, exactly as passed to hardware */
-struct via_sgd_table {
-	u32 addr;
-	u32 count;	/* includes additional VIA_xxx bits also */
-};
-
-#define VIA_EOL (1 << 31)
-#define VIA_FLAG (1 << 30)
-#define VIA_STOP (1 << 29)
-
-
-enum via_channel_states {
-	sgd_stopped = 0,
-	sgd_in_progress = 1,
-};
-
-
-struct via_buffer_pgtbl {
-	dma_addr_t handle;
-	void *cpuaddr;
-};
-
-
-struct via_channel {
-	atomic_t n_frags;
-	atomic_t hw_ptr;
-	wait_queue_head_t wait;
-
-	unsigned int sw_ptr;
-	unsigned int slop_len;
-	unsigned int n_irqs;
-	int bytes;
-
-	unsigned is_active : 1;
-	unsigned is_record : 1;
-	unsigned is_mapped : 1;
-	unsigned is_enabled : 1;
-	unsigned is_multi: 1;	/* 8233 6 channel */
-	u8 pcm_fmt;		/* VIA_PCM_FMT_xxx */
-	u8 channels;		/* Channel count */
-
-	unsigned rate;		/* sample rate */
-	unsigned int frag_size;
-	unsigned int frag_number;
-	
-	unsigned char intmask;
-
-	volatile struct via_sgd_table *sgtable;
-	dma_addr_t sgt_handle;
-
-	unsigned int page_number;
-	struct via_buffer_pgtbl pgtbl[VIA_MAX_BUFFER_DMA_PAGES];
-
-	long iobase;
-
-	const char *name;
-};
-
-
-/* data stored for each chip */
-struct via_info {
-	struct pci_dev *pdev;
-	long baseaddr;
-
-	struct ac97_codec *ac97;
-	spinlock_t ac97_lock;
-	spinlock_t lock;
-	int card_num;		/* unique card number, from 0 */
-
-	int dev_dsp;		/* /dev/dsp index from register_sound_dsp() */
-
-	unsigned rev_h : 1;
-	unsigned legacy: 1;	/* Has legacy ports */
-	unsigned intmask: 1;	/* Needs int bits */
-	unsigned sixchannel: 1;	/* 8233/35 with 6 channel support */
-	unsigned volume: 1;
-
-	unsigned locked_rate : 1;
-	
-	int mixer_vol;		/* 8233/35 volume  - not yet implemented */
-
-	struct mutex syscall_mutex;
-	struct mutex open_mutex;
-
-	/* The 8233/8235 have 4 DX audio channels, two record and
-	   one six channel out. We bind ch_in to DX 1, ch_out to multichannel
-	   and ch_fm to DX 2. DX 3 and REC0/REC1 are unused at the
-	   moment */
-	   
-	struct via_channel ch_in;
-	struct via_channel ch_out;
-	struct via_channel ch_fm;
-
-#ifdef CONFIG_MIDI_VIA82CXXX
-        void *midi_devc;
-        struct address_info midi_info;
-#endif
-};
-
-
-/* number of cards, used for assigning unique numbers to cards */
-static unsigned via_num_cards;
-
-
-
-/****************************************************************
- *
- * prototypes
- *
- *
- */
-
-static int via_init_one (struct pci_dev *dev, const struct pci_device_id *id);
-static void __devexit via_remove_one (struct pci_dev *pdev);
-
-static ssize_t via_dsp_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos);
-static ssize_t via_dsp_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos);
-static unsigned int via_dsp_poll(struct file *file, struct poll_table_struct *wait);
-static int via_dsp_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
-static int via_dsp_open (struct inode *inode, struct file *file);
-static int via_dsp_release(struct inode *inode, struct file *file);
-static int via_dsp_mmap(struct file *file, struct vm_area_struct *vma);
-
-static u16 via_ac97_read_reg (struct ac97_codec *codec, u8 reg);
-static void via_ac97_write_reg (struct ac97_codec *codec, u8 reg, u16 value);
-static u8 via_ac97_wait_idle (struct via_info *card);
-
-static void via_chan_free (struct via_info *card, struct via_channel *chan);
-static void via_chan_clear (struct via_info *card, struct via_channel *chan);
-static void via_chan_pcm_fmt (struct via_channel *chan, int reset);
-static void via_chan_buffer_free (struct via_info *card, struct via_channel *chan);
-
-
-/****************************************************************
- *
- * Various data the driver needs
- *
- *
- */
-
-
-static struct pci_device_id via_pci_tbl[] = {
-	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8233_5,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci,via_pci_tbl);
-
-
-static struct pci_driver via_driver = {
-	.name		= VIA_MODULE_NAME,
-	.id_table	= via_pci_tbl,
-	.probe		= via_init_one,
-	.remove		= __devexit_p(via_remove_one),
-};
-
-
-/****************************************************************
- *
- * Low-level base 0 register read/write helpers
- *
- *
- */
-
-/**
- *	via_chan_stop - Terminate DMA on specified PCM channel
- *	@iobase: PCI base address for SGD channel registers
- *
- *	Terminate scatter-gather DMA operation for given
- *	channel (derived from @iobase), if DMA is active.
- *
- *	Note that @iobase is not the PCI base address,
- *	but the PCI base address plus an offset to
- *	one of three PCM channels supported by the chip.
- *
- */
-
-static inline void via_chan_stop (long iobase)
-{
-	if (inb (iobase + VIA_PCM_STATUS) & VIA_SGD_ACTIVE)
-		outb (VIA_SGD_TERMINATE, iobase + VIA_PCM_CONTROL);
-}
-
-
-/**
- *	via_chan_status_clear - Clear status flags on specified DMA channel
- *	@iobase: PCI base address for SGD channel registers
- *
- *	Clear any pending status flags for the given
- *	DMA channel (derived from @iobase), if any
- *	flags are asserted.
- *
- *	Note that @iobase is not the PCI base address,
- *	but the PCI base address plus an offset to
- *	one of three PCM channels supported by the chip.
- *
- */
-
-static inline void via_chan_status_clear (long iobase)
-{
-	u8 tmp = inb (iobase + VIA_PCM_STATUS);
-
-	if (tmp != 0)
-		outb (tmp, iobase + VIA_PCM_STATUS);
-}
-
-
-/**
- *	sg_begin - Begin recording or playback on a PCM channel
- *	@chan: Channel for which DMA operation shall begin
- *
- *	Start scatter-gather DMA for the given channel.
- *
- */
-
-static inline void sg_begin (struct via_channel *chan)
-{
-	DPRINTK("Start with intmask %d\n", chan->intmask);
-	DPRINTK("About to start from %d to %d\n", 
-		inl(chan->iobase + VIA_PCM_BLOCK_COUNT),
-		inb(chan->iobase + VIA_PCM_STOPRATE + 3));
-	outb (VIA_SGD_START|chan->intmask, chan->iobase + VIA_PCM_CONTROL);
-	DPRINTK("Status is now %02X\n", inb(chan->iobase + VIA_PCM_STATUS));
-	DPRINTK("Control is now %02X\n", inb(chan->iobase + VIA_PCM_CONTROL));
-}
-
-
-static int sg_active (long iobase)
-{
-	u8 tmp = inb (iobase + VIA_PCM_STATUS);
-	if ((tmp & VIA_SGD_STOPPED) || (tmp & VIA_SGD_PAUSED)) {
-		printk(KERN_WARNING "via82cxxx warning: SG stopped or paused\n");
-		return 0;
-	}
-	if (tmp & VIA_SGD_ACTIVE)
-		return 1;
-	return 0;
-}
-
-static int via_sg_offset(struct via_channel *chan)
-{
-	return inl (chan->iobase + VIA_PCM_BLOCK_COUNT) & 0x00FFFFFF;
-}
-
-/****************************************************************
- *
- * Miscellaneous debris
- *
- *
- */
-
-
-/**
- *	via_syscall_down - down the card-specific syscell semaphore
- *	@card: Private info for specified board
- *	@nonblock: boolean, non-zero if O_NONBLOCK is set
- *
- *	Encapsulates standard method of acquiring the syscall sem.
- *
- *	Returns negative errno on error, or zero for success.
- */
-
-static inline int via_syscall_down (struct via_info *card, int nonblock)
-{
-	/* Thomas Sailer:
-	 * EAGAIN is supposed to be used if IO is pending,
-	 * not if there is contention on some internal
-	 * synchronization primitive which should be
-	 * held only for a short time anyway
-	 */
-	nonblock = 0;
-
-	if (nonblock) {
-		if (!mutex_trylock(&card->syscall_mutex))
-			return -EAGAIN;
-	} else {
-		if (mutex_lock_interruptible(&card->syscall_mutex))
-			return -ERESTARTSYS;
-	}
-
-	return 0;
-}
-
-
-/**
- *	via_stop_everything - Stop all audio operations
- *	@card: Private info for specified board
- *
- *	Stops all DMA operations and interrupts, and clear
- *	any pending status bits resulting from those operations.
- */
-
-static void via_stop_everything (struct via_info *card)
-{
-	u8 tmp, new_tmp;
-
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-
-	/*
-	 * terminate any existing operations on audio read/write channels
-	 */
-	via_chan_stop (card->baseaddr + VIA_BASE0_PCM_OUT_CHAN);
-	via_chan_stop (card->baseaddr + VIA_BASE0_PCM_IN_CHAN);
-	via_chan_stop (card->baseaddr + VIA_BASE0_FM_OUT_CHAN);
-	if(card->sixchannel)
-		via_chan_stop (card->baseaddr + VIA_BASE0_MULTI_OUT_CHAN);
-
-	/*
-	 * clear any existing stops / flags (sanity check mainly)
-	 */
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_PCM_OUT_CHAN);
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_PCM_IN_CHAN);
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_FM_OUT_CHAN);
-	if(card->sixchannel)
-		via_chan_status_clear (card->baseaddr + VIA_BASE0_MULTI_OUT_CHAN);
-
-	/*
-	 * clear any enabled interrupt bits
-	 */
-	tmp = inb (card->baseaddr + VIA_BASE0_PCM_OUT_CHAN_TYPE);
-	new_tmp = tmp & ~(VIA_IRQ_ON_FLAG|VIA_IRQ_ON_EOL|VIA_RESTART_SGD_ON_EOL);
-	if (tmp != new_tmp)
-		outb (0, card->baseaddr + VIA_BASE0_PCM_OUT_CHAN_TYPE);
-
-	tmp = inb (card->baseaddr + VIA_BASE0_PCM_IN_CHAN_TYPE);
-	new_tmp = tmp & ~(VIA_IRQ_ON_FLAG|VIA_IRQ_ON_EOL|VIA_RESTART_SGD_ON_EOL);
-	if (tmp != new_tmp)
-		outb (0, card->baseaddr + VIA_BASE0_PCM_IN_CHAN_TYPE);
-
-	tmp = inb (card->baseaddr + VIA_BASE0_FM_OUT_CHAN_TYPE);
-	new_tmp = tmp & ~(VIA_IRQ_ON_FLAG|VIA_IRQ_ON_EOL|VIA_RESTART_SGD_ON_EOL);
-	if (tmp != new_tmp)
-		outb (0, card->baseaddr + VIA_BASE0_FM_OUT_CHAN_TYPE);
-
-	if(card->sixchannel)
-	{
-		tmp = inb (card->baseaddr + VIA_BASE0_MULTI_OUT_CHAN_TYPE);
-		new_tmp = tmp & ~(VIA_IRQ_ON_FLAG|VIA_IRQ_ON_EOL|VIA_RESTART_SGD_ON_EOL);
-		if (tmp != new_tmp)
-			outb (0, card->baseaddr + VIA_BASE0_MULTI_OUT_CHAN_TYPE);
-	}
-
-	udelay(10);
-
-	/*
-	 * clear any existing flags
-	 */
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_PCM_OUT_CHAN);
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_PCM_IN_CHAN);
-	via_chan_status_clear (card->baseaddr + VIA_BASE0_FM_OUT_CHAN);
-
-	DPRINTK ("EXIT\n");
-}
-
-
-/**
- *	via_set_rate - Set PCM rate for given channel
- *	@ac97: Pointer to generic codec info struct
- *	@chan: Private info for specified channel
- *	@rate: Desired PCM sample rate, in Khz
- *
- *	Sets the PCM sample rate for a channel.
- *
- *	Values for @rate are clamped to a range of 4000 Khz through 48000 Khz,
- *	due to hardware constraints.
- */
-
-static int via_set_rate (struct ac97_codec *ac97,
-			 struct via_channel *chan, unsigned rate)
-{
-	struct via_info *card = ac97->private_data;
-	int rate_reg;
-	u32 dacp;
-	u32 mast_vol, phone_vol, mono_vol, pcm_vol;
-	u32 mute_vol = 0x8000;	/* The mute volume? -- Seems to work! */
-
-	DPRINTK ("ENTER, rate = %d\n", rate);
-
-	if (chan->rate == rate)
-		goto out;
-	if (card->locked_rate) {
-		chan->rate = 48000;
-		goto out;
-	}
-
-	if (rate > 48000)		rate = 48000;
-	if (rate < 4000) 		rate = 4000;
-
-	rate_reg = chan->is_record ? AC97_PCM_LR_ADC_RATE :
-			    AC97_PCM_FRONT_DAC_RATE;
-
-	/* Save current state */
-	dacp=via_ac97_read_reg(ac97, AC97_POWER_CONTROL);
-	mast_vol = via_ac97_read_reg(ac97, AC97_MASTER_VOL_STEREO);
-	mono_vol = via_ac97_read_reg(ac97, AC97_MASTER_VOL_MONO);
-	phone_vol = via_ac97_read_reg(ac97, AC97_HEADPHONE_VOL);
-	pcm_vol = via_ac97_read_reg(ac97, AC97_PCMOUT_VOL);
-	/* Mute - largely reduces popping */
-	via_ac97_write_reg(ac97, AC97_MASTER_VOL_STEREO, mute_vol);
-	via_ac97_write_reg(ac97, AC97_MASTER_VOL_MONO, mute_vol);
-	via_ac97_write_reg(ac97, AC97_HEADPHONE_VOL, mute_vol);
-       	via_ac97_write_reg(ac97, AC97_PCMOUT_VOL, mute_vol);
-	/* Power down the DAC */
-	via_ac97_write_reg(ac97, AC97_POWER_CONTROL, dacp|0x0200);
-
-        /* Set new rate */
-	via_ac97_write_reg (ac97, rate_reg, rate);
-
-	/* Power DAC back up */
-	via_ac97_write_reg(ac97, AC97_POWER_CONTROL, dacp);
-	udelay (200); /* reduces popping */
-
-	/* Restore volumes */
-	via_ac97_write_reg(ac97, AC97_MASTER_VOL_STEREO, mast_vol);
-	via_ac97_write_reg(ac97, AC97_MASTER_VOL_MONO, mono_vol);
-	via_ac97_write_reg(ac97, AC97_HEADPHONE_VOL, phone_vol);
-	via_ac97_write_reg(ac97, AC97_PCMOUT_VOL, pcm_vol);
-
-	/* the hardware might return a value different than what we
-	 * passed to it, so read the rate value back from hardware
-	 * to see what we came up with
-	 */
-	chan->rate = via_ac97_read_reg (ac97, rate_reg);
-
-	if (chan->rate == 0) {
-		card->locked_rate = 1;
-		chan->rate = 48000;
-		printk (KERN_WARNING PFX "Codec rate locked at 48Khz\n");
-	}
-
-out:
-	DPRINTK ("EXIT, returning rate %d Hz\n", chan->rate);
-	return chan->rate;
-}
-
-
-/****************************************************************
- *
- * Channel-specific operations
- *
- *
- */
-
-
-/**
- *	via_chan_init_defaults - Initialize a struct via_channel
- *	@card: Private audio chip info
- *	@chan: Channel to be initialized
- *
- *	Zero @chan, and then set all static defaults for the structure.
- */
-
-static void via_chan_init_defaults (struct via_info *card, struct via_channel *chan)
-{
-	memset (chan, 0, sizeof (*chan));
-
-	if(card->intmask)
-		chan->intmask = 0x23;	/* Turn on the IRQ bits */
-		
-	if (chan == &card->ch_out) {
-		chan->name = "PCM-OUT";
-		if(card->sixchannel)
-		{
-			chan->iobase = card->baseaddr + VIA_BASE0_MULTI_OUT_CHAN;
-			chan->is_multi = 1;
-			DPRINTK("Using multichannel for pcm out\n");
-		}
-		else
-			chan->iobase = card->baseaddr + VIA_BASE0_PCM_OUT_CHAN;
-	} else if (chan == &card->ch_in) {
-		chan->name = "PCM-IN";
-		chan->iobase = card->baseaddr + VIA_BASE0_PCM_IN_CHAN;
-		chan->is_record = 1;
-	} else if (chan == &card->ch_fm) {
-		chan->name = "PCM-OUT-FM";
-		chan->iobase = card->baseaddr + VIA_BASE0_FM_OUT_CHAN;
-	} else {
-		BUG();
-	}
-
-	init_waitqueue_head (&chan->wait);
-
-	chan->pcm_fmt = VIA_PCM_FMT_MASK;
-	chan->is_enabled = 1;
-
-	chan->frag_number = 0;
-        chan->frag_size = 0;
-	atomic_set(&chan->n_frags, 0);
-	atomic_set (&chan->hw_ptr, 0);
-}
-
-/**
- *      via_chan_init - Initialize PCM channel
- *      @card: Private audio chip info
- *      @chan: Channel to be initialized
- *
- *      Performs some of the preparations necessary to begin
- *      using a PCM channel.
- *
- *      Currently the preparations consist of
- *      setting the PCM channel to a known state.
- */
-
-
-static void via_chan_init (struct via_info *card, struct via_channel *chan)
-{
-
-        DPRINTK ("ENTER\n");
-
-	/* bzero channel structure, and init members to defaults */
-        via_chan_init_defaults (card, chan);
-
-        /* stop any existing channel output */
-        via_chan_clear (card, chan);
-        via_chan_status_clear (chan->iobase);
-        via_chan_pcm_fmt (chan, 1);
-
-	DPRINTK ("EXIT\n");
-}
-
-/**
- *	via_chan_buffer_init - Initialize PCM channel buffer
- *	@card: Private audio chip info
- *	@chan: Channel to be initialized
- *
- *	Performs some of the preparations necessary to begin
- *	using a PCM channel.
- *
- *	Currently the preparations include allocating the
- *	scatter-gather DMA table and buffers,
- *	and passing the
- *	address of the DMA table to the hardware.
- *
- *	Note that special care is taken when passing the
- *	DMA table address to hardware, because it was found
- *	during driver development that the hardware did not
- *	always "take" the address.
- */
-
-static int via_chan_buffer_init (struct via_info *card, struct via_channel *chan)
-{
-	int page, offset;
-	int i;
-
-	DPRINTK ("ENTER\n");
-
-
-	chan->intmask = 0;
-	if(card->intmask)
-		chan->intmask = 0x23;	/* Turn on the IRQ bits */
-		
-	if (chan->sgtable != NULL) {
-		DPRINTK ("EXIT\n");
-		return 0;
-	}
-
-	/* alloc DMA-able memory for scatter-gather table */
-	chan->sgtable = pci_alloc_consistent (card->pdev,
-		(sizeof (struct via_sgd_table) * chan->frag_number),
-		&chan->sgt_handle);
-	if (!chan->sgtable) {
-		printk (KERN_ERR PFX "DMA table alloc fail, aborting\n");
-		DPRINTK ("EXIT\n");
-		return -ENOMEM;
-	}
-
-	memset ((void*)chan->sgtable, 0,
-		(sizeof (struct via_sgd_table) * chan->frag_number));
-
-	/* alloc DMA-able memory for scatter-gather buffers */
-
-	chan->page_number = (chan->frag_number * chan->frag_size) / PAGE_SIZE +
-			    (((chan->frag_number * chan->frag_size) % PAGE_SIZE) ? 1 : 0);
-
-	for (i = 0; i < chan->page_number; i++) {
-		chan->pgtbl[i].cpuaddr = pci_alloc_consistent (card->pdev, PAGE_SIZE,
-					      &chan->pgtbl[i].handle);
-
-		if (!chan->pgtbl[i].cpuaddr) {
-			chan->page_number = i;
-			goto err_out_nomem;
-		}
-
-#ifndef VIA_NDEBUG
-                memset (chan->pgtbl[i].cpuaddr, 0xBC, chan->frag_size);
-#endif
-
-#if 1
-                DPRINTK ("dmabuf_pg #%d (h=%lx, v2p=%lx, a=%p)\n",
-			i, (long)chan->pgtbl[i].handle,
-			virt_to_phys(chan->pgtbl[i].cpuaddr),
-			chan->pgtbl[i].cpuaddr);
-#endif
-	}
-
-	for (i = 0; i < chan->frag_number; i++) {
-
-		page = i / (PAGE_SIZE / chan->frag_size);
-		offset = (i % (PAGE_SIZE / chan->frag_size)) * chan->frag_size;
-
-		chan->sgtable[i].count = cpu_to_le32 (chan->frag_size | VIA_FLAG);
-		chan->sgtable[i].addr = cpu_to_le32 (chan->pgtbl[page].handle + offset);
-
-#if 1
-		DPRINTK ("dmabuf #%d (32(h)=%lx)\n",
-			 i,
-			 (long)chan->sgtable[i].addr);
-#endif
-	}
-
-	/* overwrite the last buffer information */
-	chan->sgtable[chan->frag_number - 1].count = cpu_to_le32 (chan->frag_size | VIA_EOL);
-
-	/* set location of DMA-able scatter-gather info table */
-	DPRINTK ("outl (0x%X, 0x%04lX)\n",
-		chan->sgt_handle, chan->iobase + VIA_PCM_TABLE_ADDR);
-
-	via_ac97_wait_idle (card);
-	outl (chan->sgt_handle, chan->iobase + VIA_PCM_TABLE_ADDR);
-	udelay (20);
-	via_ac97_wait_idle (card);
-	/* load no rate adaption, stereo 16bit, set up ring slots */
-	if(card->sixchannel)
-	{
-		if(!chan->is_multi)
-		{
-			outl (0xFFFFF | (0x3 << 20) | (chan->frag_number << 24), chan->iobase + VIA_PCM_STOPRATE);
-			udelay (20);
-			via_ac97_wait_idle (card);
-		}
-	}
-
-	DPRINTK ("inl (0x%lX) = %x\n",
-		chan->iobase + VIA_PCM_TABLE_ADDR,
-		inl(chan->iobase + VIA_PCM_TABLE_ADDR));
-
-	DPRINTK ("EXIT\n");
-	return 0;
-
-err_out_nomem:
-	printk (KERN_ERR PFX "DMA buffer alloc fail, aborting\n");
-	via_chan_buffer_free (card, chan);
-	DPRINTK ("EXIT\n");
-	return -ENOMEM;
-}
-
-
-/**
- *	via_chan_free - Release a PCM channel
- *	@card: Private audio chip info
- *	@chan: Channel to be released
- *
- *	Performs all the functions necessary to clean up
- *	an initialized channel.
- *
- *	Currently these functions include disabled any
- *	active DMA operations, setting the PCM channel
- *	back to a known state, and releasing any allocated
- *	sound buffers.
- */
-
-static void via_chan_free (struct via_info *card, struct via_channel *chan)
-{
-	DPRINTK ("ENTER\n");
-
-	spin_lock_irq (&card->lock);
-
-	/* stop any existing channel output */
-	via_chan_status_clear (chan->iobase);
-	via_chan_stop (chan->iobase);
-	via_chan_status_clear (chan->iobase);
-
-	spin_unlock_irq (&card->lock);
-
-	synchronize_irq(card->pdev->irq);
-
-	DPRINTK ("EXIT\n");
-}
-
-static void via_chan_buffer_free (struct via_info *card, struct via_channel *chan)
-{
-	int i;
-
-        DPRINTK ("ENTER\n");
-
-	/* zero location of DMA-able scatter-gather info table */
-	via_ac97_wait_idle(card);
-	outl (0, chan->iobase + VIA_PCM_TABLE_ADDR);
-
-	for (i = 0; i < chan->page_number; i++)
-		if (chan->pgtbl[i].cpuaddr) {
-			pci_free_consistent (card->pdev, PAGE_SIZE,
-					     chan->pgtbl[i].cpuaddr,
-					     chan->pgtbl[i].handle);
-			chan->pgtbl[i].cpuaddr = NULL;
-			chan->pgtbl[i].handle = 0;
-		}
-
-	chan->page_number = 0;
-
-	if (chan->sgtable) {
-		pci_free_consistent (card->pdev,
-			(sizeof (struct via_sgd_table) * chan->frag_number),
-			(void*)chan->sgtable, chan->sgt_handle);
-		chan->sgtable = NULL;
-	}
-
-	DPRINTK ("EXIT\n");
-}
-
-
-/**
- *	via_chan_pcm_fmt - Update PCM channel settings
- *	@chan: Channel to be updated
- *	@reset: Boolean.  If non-zero, channel will be reset
- *		to 8-bit mono mode.
- *
- *	Stores the settings of the current PCM format,
- *	8-bit or 16-bit, and mono/stereo, into the
- *	hardware settings for the specified channel.
- *	If @reset is non-zero, the channel is reset
- *	to 8-bit mono mode.  Otherwise, the channel
- *	is set to the values stored in the channel
- *	information struct @chan.
- */
-
-static void via_chan_pcm_fmt (struct via_channel *chan, int reset)
-{
-	DPRINTK ("ENTER, pcm_fmt=0x%02X, reset=%s\n",
-		 chan->pcm_fmt, reset ? "yes" : "no");
-
-	assert (chan != NULL);
-
-	if (reset)
-	{
-		/* reset to 8-bit mono mode */
-		chan->pcm_fmt = 0;
-		chan->channels = 1;
-	}
-
-	/* enable interrupts on FLAG and EOL */
-	chan->pcm_fmt |= VIA_CHAN_TYPE_MASK;
-
-	/* if we are recording, enable recording fifo bit */
-	if (chan->is_record)
-		chan->pcm_fmt |= VIA_PCM_REC_FIFO;
-	/* set interrupt select bits where applicable (PCM in & out channels) */
-	if (!chan->is_record)
-		chan->pcm_fmt |= VIA_CHAN_TYPE_INT_SELECT;
-	
-	DPRINTK("SET FMT - %02x %02x\n", chan->intmask , chan->is_multi);
-	
-	if(chan->intmask)
-	{
-		u32 m;
-
-		/*
-		 *	Channel 0x4 is up to 6 x 16bit and has to be
-		 *	programmed differently 
-		 */
-		 		
-		if(chan->is_multi)
-		{
-			u8 c = 0;
-			
-			/*
-			 *	Load the type bit for num channels
-			 *	and 8/16bit
-			 */
-			 
-			if(chan->pcm_fmt & VIA_PCM_FMT_16BIT)
-				c = 1 << 7;
-			if(chan->pcm_fmt & VIA_PCM_FMT_STEREO)
-				c |= (2<<4);
-			else
-				c |= (1<<4);
-				
-			outb(c, chan->iobase + VIA_PCM_TYPE);
-			
-			/*
-			 *	Set the channel steering
-			 *	Mono
-			 *		Channel 0 to slot 3
-			 *		Channel 0 to slot 4
-			 *	Stereo
-			 *		Channel 0 to slot 3
-			 *		Channel 1 to slot 4
-			 */
-			 
-			switch(chan->channels)
-			{
-				case 1:
-					outl(0xFF000000 | (1<<0) | (1<<4) , chan->iobase + VIA_PCM_STOPRATE);
-					break;
-				case 2:
-					outl(0xFF000000 | (1<<0) | (2<<4) , chan->iobase + VIA_PCM_STOPRATE);
-					break;
-				case 4:
-					outl(0xFF000000 | (1<<0) | (2<<4) | (3<<8) | (4<<12), chan->iobase + VIA_PCM_STOPRATE);
-					break;
-				case 6:
-					outl(0xFF000000 | (1<<0) | (2<<4) | (5<<8) | (6<<12) | (3<<16) | (4<<20), chan->iobase + VIA_PCM_STOPRATE);
-					break;
-			}				
-		}
-		else
-		{
-			/*
-			 *	New style, turn off channel volume
-			 *	control, set bits in the right register
-			 */	
-			outb(0x0, chan->iobase + VIA_PCM_LEFTVOL);
-			outb(0x0, chan->iobase + VIA_PCM_RIGHTVOL);
-
-			m = inl(chan->iobase + VIA_PCM_STOPRATE);
-			m &= ~(3<<20);
-			if(chan->pcm_fmt & VIA_PCM_FMT_STEREO)
-				m |= (1 << 20);
-			if(chan->pcm_fmt & VIA_PCM_FMT_16BIT)
-				m |= (1 << 21);
-			outl(m, chan->iobase + VIA_PCM_STOPRATE);
-		}		
-	}
-	else
-		outb (chan->pcm_fmt, chan->iobase + VIA_PCM_TYPE);
-
-
-	DPRINTK ("EXIT, pcm_fmt = 0x%02X, reg = 0x%02X\n",
-		 chan->pcm_fmt,
-		 inb (chan->iobase + VIA_PCM_TYPE));
-}
-
-
-/**
- *	via_chan_clear - Stop DMA channel operation, and reset pointers
- *	@card: the chip to accessed
- *	@chan: Channel to be cleared
- *
- *	Call via_chan_stop to halt DMA operations, and then resets
- *	all software pointers which track DMA operation.
- */
-
-static void via_chan_clear (struct via_info *card, struct via_channel *chan)
-{
-	DPRINTK ("ENTER\n");
-	via_chan_stop (chan->iobase);
-	via_chan_buffer_free(card, chan);
-	chan->is_active = 0;
-	chan->is_mapped = 0;
-	chan->is_enabled = 1;
-	chan->slop_len = 0;
-	chan->sw_ptr = 0;
-	chan->n_irqs = 0;
-	atomic_set (&chan->hw_ptr, 0);
-	DPRINTK ("EXIT\n");
-}
-
-
-/**
- *	via_chan_set_speed - Set PCM sample rate for given channel
- *	@card: Private info for specified board
- *	@chan: Channel whose sample rate will be adjusted
- *	@val: New sample rate, in Khz
- *
- *	Helper function for the %SNDCTL_DSP_SPEED ioctl.  OSS semantics
- *	demand that all audio operations halt (if they are not already
- *	halted) when the %SNDCTL_DSP_SPEED is given.
- *
- *	This function halts all audio operations for the given channel
- *	@chan, and then calls via_set_rate to set the audio hardware
- *	to the new rate.
- */
-
-static int via_chan_set_speed (struct via_info *card,
-			       struct via_channel *chan, int val)
-{
-	DPRINTK ("ENTER, requested rate = %d\n", val);
-
-	via_chan_clear (card, chan);
-
-	val = via_set_rate (card->ac97, chan, val);
-
-	DPRINTK ("EXIT, returning %d\n", val);
-	return val;
-}
-
-
-/**
- *	via_chan_set_fmt - Set PCM sample size for given channel
- *	@card: Private info for specified board
- *	@chan: Channel whose sample size will be adjusted
- *	@val: New sample size, use the %AFMT_xxx constants
- *
- *	Helper function for the %SNDCTL_DSP_SETFMT ioctl.  OSS semantics
- *	demand that all audio operations halt (if they are not already
- *	halted) when the %SNDCTL_DSP_SETFMT is given.
- *
- *	This function halts all audio operations for the given channel
- *	@chan, and then calls via_chan_pcm_fmt to set the audio hardware
- *	to the new sample size, either 8-bit or 16-bit.
- */
-
-static int via_chan_set_fmt (struct via_info *card,
-			     struct via_channel *chan, int val)
-{
-	DPRINTK ("ENTER, val=%s\n",
-		 val == AFMT_U8 ? "AFMT_U8" :
-	 	 val == AFMT_S16_LE ? "AFMT_S16_LE" :
-		 "unknown");
-
-	via_chan_clear (card, chan);
-
-	assert (val != AFMT_QUERY); /* this case is handled elsewhere */
-
-	switch (val) {
-	case AFMT_S16_LE:
-		if ((chan->pcm_fmt & VIA_PCM_FMT_16BIT) == 0) {
-			chan->pcm_fmt |= VIA_PCM_FMT_16BIT;
-			via_chan_pcm_fmt (chan, 0);
-		}
-		break;
-
-	case AFMT_U8:
-		if (chan->pcm_fmt & VIA_PCM_FMT_16BIT) {
-			chan->pcm_fmt &= ~VIA_PCM_FMT_16BIT;
-			via_chan_pcm_fmt (chan, 0);
-		}
-		break;
-
-	default:
-		DPRINTK ("unknown AFMT: 0x%X\n", val);
-		val = AFMT_S16_LE;
-	}
-
-	DPRINTK ("EXIT\n");
-	return val;
-}
-
-
-/**
- *	via_chan_set_stereo - Enable or disable stereo for a DMA channel
- *	@card: Private info for specified board
- *	@chan: Channel whose stereo setting will be adjusted
- *	@val: New sample size, use the %AFMT_xxx constants
- *
- *	Helper function for the %SNDCTL_DSP_CHANNELS and %SNDCTL_DSP_STEREO ioctls.  OSS semantics
- *	demand that all audio operations halt (if they are not already
- *	halted) when %SNDCTL_DSP_CHANNELS or SNDCTL_DSP_STEREO is given.
- *
- *	This function halts all audio operations for the given channel
- *	@chan, and then calls via_chan_pcm_fmt to set the audio hardware
- *	to enable or disable stereo.
- */
-
-static int via_chan_set_stereo (struct via_info *card,
-			        struct via_channel *chan, int val)
-{
-	DPRINTK ("ENTER, channels = %d\n", val);
-
-	via_chan_clear (card, chan);
-
-	switch (val) {
-
-	/* mono */
-	case 1:
-		chan->pcm_fmt &= ~VIA_PCM_FMT_STEREO;
-		chan->channels = 1;
-		via_chan_pcm_fmt (chan, 0);
-		break;
-
-	/* stereo */
-	case 2:
-		chan->pcm_fmt |= VIA_PCM_FMT_STEREO;
-		chan->channels = 2;
-		via_chan_pcm_fmt (chan, 0);
-		break;
-
-	case 4:
-	case 6:
-		if(chan->is_multi)
-		{
-			chan->pcm_fmt |= VIA_PCM_FMT_STEREO;
-			chan->channels = val;
-			break;
-		}
-	/* unknown */
-	default:
-		val = -EINVAL;
-		break;
-	}
-
-	DPRINTK ("EXIT, returning %d\n", val);
-	return val;
-}
-
-static int via_chan_set_buffering (struct via_info *card,
-                                struct via_channel *chan, int val)
-{
-	int shift;
-
-        DPRINTK ("ENTER\n");
-
-	/* in both cases the buffer cannot be changed */
-	if (chan->is_active || chan->is_mapped) {
-		DPRINTK ("EXIT\n");
-		return -EINVAL;
-	}
-
-	/* called outside SETFRAGMENT */
-	/* set defaults or do nothing */
-	if (val < 0) {
-
-		if (chan->frag_size && chan->frag_number)
-			goto out;
-
-		DPRINTK ("\n");
-
-		chan->frag_size = (VIA_DEFAULT_FRAG_TIME * chan->rate * chan->channels
-				   * ((chan->pcm_fmt & VIA_PCM_FMT_16BIT) ? 2 : 1)) / 1000 - 1;
-
-		shift = 0;
-		while (chan->frag_size) {
-			chan->frag_size >>= 1;
-			shift++;
-		}
-		chan->frag_size = 1 << shift;
-
-		chan->frag_number = (VIA_DEFAULT_BUFFER_TIME / VIA_DEFAULT_FRAG_TIME);
-
-		DPRINTK ("setting default values %d %d\n", chan->frag_size, chan->frag_number);
-	} else {
-		chan->frag_size = 1 << (val & 0xFFFF);
-		chan->frag_number = (val >> 16) & 0xFFFF;
-
-		DPRINTK ("using user values %d %d\n", chan->frag_size, chan->frag_number);
-	}
-
-	/* quake3 wants frag_number to be a power of two */
-	shift = 0;
-	while (chan->frag_number) {
-		chan->frag_number >>= 1;
-		shift++;
-	}
-	chan->frag_number = 1 << shift;
-
-	if (chan->frag_size > VIA_MAX_FRAG_SIZE)
-		chan->frag_size = VIA_MAX_FRAG_SIZE;
-	else if (chan->frag_size < VIA_MIN_FRAG_SIZE)
-		chan->frag_size = VIA_MIN_FRAG_SIZE;
-
-	if (chan->frag_number < VIA_MIN_FRAG_NUMBER)
-                chan->frag_number = VIA_MIN_FRAG_NUMBER;
-        if (chan->frag_number > VIA_MAX_FRAG_NUMBER)
-        	chan->frag_number = VIA_MAX_FRAG_NUMBER;
-
-	if ((chan->frag_number * chan->frag_size) / PAGE_SIZE > VIA_MAX_BUFFER_DMA_PAGES)
-		chan->frag_number = (VIA_MAX_BUFFER_DMA_PAGES * PAGE_SIZE) / chan->frag_size;
-
-out:
-	if (chan->is_record)
-		atomic_set (&chan->n_frags, 0);
-	else
-		atomic_set (&chan->n_frags, chan->frag_number);
-
-	DPRINTK ("EXIT\n");
-
-	return 0;
-}
-
-#ifdef VIA_CHAN_DUMP_BUFS
-/**
- *	via_chan_dump_bufs - Display DMA table contents
- *	@chan: Channel whose DMA table will be displayed
- *
- *	Debugging function which displays the contents of the
- *	scatter-gather DMA table for the given channel @chan.
- */
-
-static void via_chan_dump_bufs (struct via_channel *chan)
-{
-	int i;
-
-	for (i = 0; i < chan->frag_number; i++) {
-		DPRINTK ("#%02d: addr=%x, count=%u, flag=%d, eol=%d\n",
-			 i, chan->sgtable[i].addr,
-			 chan->sgtable[i].count & 0x00FFFFFF,
-			 chan->sgtable[i].count & VIA_FLAG ? 1 : 0,
-			 chan->sgtable[i].count & VIA_EOL ? 1 : 0);
-	}
-	DPRINTK ("buf_in_use = %d, nextbuf = %d\n",
-		 atomic_read (&chan->buf_in_use),
-		 atomic_read (&chan->sw_ptr));
-}
-#endif /* VIA_CHAN_DUMP_BUFS */
-
-
-/**
- *	via_chan_flush_frag - Flush partially-full playback buffer to hardware
- *	@chan: Channel whose DMA table will be flushed
- *
- *	Flushes partially-full playback buffer to hardware.
- */
-
-static void via_chan_flush_frag (struct via_channel *chan)
-{
-	DPRINTK ("ENTER\n");
-
-	assert (chan->slop_len > 0);
-
-	if (chan->sw_ptr == (chan->frag_number - 1))
-		chan->sw_ptr = 0;
-	else
-		chan->sw_ptr++;
-
-	chan->slop_len = 0;
-
-	assert (atomic_read (&chan->n_frags) > 0);
-	atomic_dec (&chan->n_frags);
-
-	DPRINTK ("EXIT\n");
-}
-
-
-
-/**
- *	via_chan_maybe_start - Initiate audio hardware DMA operation
- *	@chan: Channel whose DMA is to be started
- *
- *	Initiate DMA operation, if the DMA engine for the given
- *	channel @chan is not already active.
- */
-
-static inline void via_chan_maybe_start (struct via_channel *chan)
-{
-	assert (chan->is_active == sg_active(chan->iobase));
-
-	DPRINTK ("MAYBE START %s\n", chan->name);
-	if (!chan->is_active && chan->is_enabled) {
-		chan->is_active = 1;
-		sg_begin (chan);
-		DPRINTK ("starting channel %s\n", chan->name);
-	}
-}
-
-
-/****************************************************************
- *
- * Interface to ac97-codec module
- *
- *
- */
-
-/**
- *	via_ac97_wait_idle - Wait until AC97 codec is not busy
- *	@card: Private info for specified board
- *
- *	Sleep until the AC97 codec is no longer busy.
- *	Returns the final value read from the SGD
- *	register being polled.
- */
-
-static u8 via_ac97_wait_idle (struct via_info *card)
-{
-	u8 tmp8;
-	int counter = VIA_COUNTER_LIMIT;
-
-	DPRINTK ("ENTER/EXIT\n");
-
-	assert (card != NULL);
-	assert (card->pdev != NULL);
-
-	do {
-		udelay (15);
-
-		tmp8 = inb (card->baseaddr + 0x83);
-	} while ((tmp8 & VIA_CR83_BUSY) && (counter-- > 0));
-
-	if (tmp8 & VIA_CR83_BUSY)
-		printk (KERN_WARNING PFX "timeout waiting on AC97 codec\n");
-	return tmp8;
-}
-
-
-/**
- *	via_ac97_read_reg - Read AC97 standard register
- *	@codec: Pointer to generic AC97 codec info
- *	@reg: Index of AC97 register to be read
- *
- *	Read the value of a single AC97 codec register,
- *	as defined by the Intel AC97 specification.
- *
- *	Defines the standard AC97 read-register operation
- *	required by the kernel's ac97_codec interface.
- *
- *	Returns the 16-bit value stored in the specified
- *	register.
- */
-
-static u16 via_ac97_read_reg (struct ac97_codec *codec, u8 reg)
-{
-	unsigned long data;
-	struct via_info *card;
-	int counter;
-
-	DPRINTK ("ENTER\n");
-
-	assert (codec != NULL);
-	assert (codec->private_data != NULL);
-
-	card = codec->private_data;
-	
-	spin_lock(&card->ac97_lock);
-
-	/* Every time we write to register 80 we cause a transaction.
-	   The only safe way to clear the valid bit is to write it at
-	   the same time as the command */
-	data = (reg << 16) | VIA_CR80_READ | VIA_CR80_VALID;
-
-	outl (data, card->baseaddr + VIA_BASE0_AC97_CTRL);
-	udelay (20);
-
-	for (counter = VIA_COUNTER_LIMIT; counter > 0; counter--) {
-		udelay (1);
-		if ((((data = inl(card->baseaddr + VIA_BASE0_AC97_CTRL)) &
-		      (VIA_CR80_VALID|VIA_CR80_BUSY)) == VIA_CR80_VALID))
-			goto out;
-	}
-
-	printk (KERN_WARNING PFX "timeout while reading AC97 codec (0x%lX)\n", data);
-	goto err_out;
-
-out:
-	/* Once the valid bit has become set, we must wait a complete AC97
-	   frame before the data has settled. */
-	udelay(25);
-	data = (unsigned long) inl (card->baseaddr + VIA_BASE0_AC97_CTRL);
-
-	outb (0x02, card->baseaddr + 0x83);
-
-	if (((data & 0x007F0000) >> 16) == reg) {
-		DPRINTK ("EXIT, success, data=0x%lx, retval=0x%lx\n",
-			 data, data & 0x0000FFFF);
-		spin_unlock(&card->ac97_lock);
-		return data & 0x0000FFFF;
-	}
-
-	printk (KERN_WARNING "via82cxxx_audio: not our index: reg=0x%x, newreg=0x%lx\n",
-		reg, ((data & 0x007F0000) >> 16));
-
-err_out:
-	spin_unlock(&card->ac97_lock);
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-/**
- *	via_ac97_write_reg - Write AC97 standard register
- *	@codec: Pointer to generic AC97 codec info
- *	@reg: Index of AC97 register to be written
- *	@value: Value to be written to AC97 register
- *
- *	Write the value of a single AC97 codec register,
- *	as defined by the Intel AC97 specification.
- *
- *	Defines the standard AC97 write-register operation
- *	required by the kernel's ac97_codec interface.
- */
-
-static void via_ac97_write_reg (struct ac97_codec *codec, u8 reg, u16 value)
-{
-	u32 data;
-	struct via_info *card;
-	int counter;
-
-	DPRINTK ("ENTER\n");
-
-	assert (codec != NULL);
-	assert (codec->private_data != NULL);
-
-	card = codec->private_data;
-
-	spin_lock(&card->ac97_lock);
-	
-	data = (reg << 16) + value;
-	outl (data, card->baseaddr + VIA_BASE0_AC97_CTRL);
-	udelay (10);
-
-	for (counter = VIA_COUNTER_LIMIT; counter > 0; counter--) {
-		if ((inb (card->baseaddr + 0x83) & VIA_CR83_BUSY) == 0)
-			goto out;
-
-		udelay (15);
-	}
-
-	printk (KERN_WARNING PFX "timeout after AC97 codec write (0x%X, 0x%X)\n", reg, value);
-
-out:
-	spin_unlock(&card->ac97_lock);
-	DPRINTK ("EXIT\n");
-}
-
-
-static int via_mixer_open (struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct via_info *card;
-	struct pci_dev *pdev = NULL;
-	struct pci_driver *drvr;
-
-	DPRINTK ("ENTER\n");
-
-	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) {
-		drvr = pci_dev_driver (pdev);
-		if (drvr == &via_driver) {
-			assert (pci_get_drvdata (pdev) != NULL);
-
-			card = pci_get_drvdata (pdev);
-			if (card->ac97->dev_mixer == minor)
-				goto match;
-		}
-	}
-
-	DPRINTK ("EXIT, returning -ENODEV\n");
-	return -ENODEV;
-
-match:
-	pci_dev_put(pdev);
-	file->private_data = card->ac97;
-
-	DPRINTK ("EXIT, returning 0\n");
-	return nonseekable_open(inode, file);
-}
-
-static int via_mixer_ioctl (struct inode *inode, struct file *file, unsigned int cmd,
-			    unsigned long arg)
-{
-	struct ac97_codec *codec = file->private_data;
-	struct via_info *card;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-	int rc;
-
-	DPRINTK ("ENTER\n");
-
-	assert (codec != NULL);
-	card = codec->private_data;
-	assert (card != NULL);
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc) goto out;
-	
-#if 0
-	/*
-	 *	Intercept volume control on 8233 and 8235
-	 */
-	if(card->volume)
-	{
-		switch(cmd)
-		{
-			case SOUND_MIXER_READ_VOLUME:
-				return card->mixer_vol;
-			case SOUND_MIXER_WRITE_VOLUME:
-			{
-				int v;
-				if(get_user(v, (int *)arg))
-				{
-					rc = -EFAULT;
-					goto out;
-				}
-				card->mixer_vol = v;
-			}
-		}
-	}		
-#endif
-	rc = codec->mixer_ioctl(codec, cmd, arg);
-
-	mutex_unlock(&card->syscall_mutex);
-
-out:
-	DPRINTK ("EXIT, returning %d\n", rc);
-	return rc;
-}
-
-
-static const struct file_operations via_mixer_fops = {
-	.owner		= THIS_MODULE,
-	.open		= via_mixer_open,
-	.llseek		= no_llseek,
-	.ioctl		= via_mixer_ioctl,
-};
-
-
-static int __devinit via_ac97_reset (struct via_info *card)
-{
-	struct pci_dev *pdev = card->pdev;
-	u8 tmp8;
-	u16 tmp16;
-
-	DPRINTK ("ENTER\n");
-
-	assert (pdev != NULL);
-
-#ifndef NDEBUG
-	{
-		u8 r40,r41,r42,r43,r44,r48;
-		pci_read_config_byte (card->pdev, 0x40, &r40);
-		pci_read_config_byte (card->pdev, 0x41, &r41);
-		pci_read_config_byte (card->pdev, 0x42, &r42);
-		pci_read_config_byte (card->pdev, 0x43, &r43);
-		pci_read_config_byte (card->pdev, 0x44, &r44);
-		pci_read_config_byte (card->pdev, 0x48, &r48);
-		DPRINTK ("PCI config: %02X %02X %02X %02X %02X %02X\n",
-			r40,r41,r42,r43,r44,r48);
-
-		spin_lock_irq (&card->lock);
-		DPRINTK ("regs==%02X %02X %02X %08X %08X %08X %08X\n",
-			 inb (card->baseaddr + 0x00),
-			 inb (card->baseaddr + 0x01),
-			 inb (card->baseaddr + 0x02),
-			 inl (card->baseaddr + 0x04),
-			 inl (card->baseaddr + 0x0C),
-			 inl (card->baseaddr + 0x80),
-			 inl (card->baseaddr + 0x84));
-		spin_unlock_irq (&card->lock);
-
-	}
-#endif
-
-        /*
-         * Reset AC97 controller: enable, disable, enable,
-         * pausing after each command for good luck.  Only
-	 * do this if the codec is not ready, because it causes
-	 * loud pops and such due to such a hard codec reset.
-         */
-	pci_read_config_byte (pdev, VIA_ACLINK_STATUS, &tmp8);
-	if ((tmp8 & VIA_CR40_AC97_READY) == 0) {
-        	pci_write_config_byte (pdev, VIA_ACLINK_CTRL,
-				       VIA_CR41_AC97_ENABLE |
-                		       VIA_CR41_AC97_RESET |
-				       VIA_CR41_AC97_WAKEUP);
-        	udelay (100);
-
-        	pci_write_config_byte (pdev, VIA_ACLINK_CTRL, 0);
-        	udelay (100);
-
-        	pci_write_config_byte (pdev, VIA_ACLINK_CTRL,
-				       VIA_CR41_AC97_ENABLE |
-				       VIA_CR41_PCM_ENABLE |
-                		       VIA_CR41_VRA | VIA_CR41_AC97_RESET);
-        	udelay (100);
-	}
-
-	/* Make sure VRA is enabled, in case we didn't do a
-	 * complete codec reset, above
-	 */
-	pci_read_config_byte (pdev, VIA_ACLINK_CTRL, &tmp8);
-	if (((tmp8 & VIA_CR41_VRA) == 0) ||
-	    ((tmp8 & VIA_CR41_AC97_ENABLE) == 0) ||
-	    ((tmp8 & VIA_CR41_PCM_ENABLE) == 0) ||
-	    ((tmp8 & VIA_CR41_AC97_RESET) == 0)) {
-        	pci_write_config_byte (pdev, VIA_ACLINK_CTRL,
-				       VIA_CR41_AC97_ENABLE |
-				       VIA_CR41_PCM_ENABLE |
-                		       VIA_CR41_VRA | VIA_CR41_AC97_RESET);
-        	udelay (100);
-	}
-
-	if(card->legacy)
-	{
-#if 0 /* this breaks on K7M */
-		/* disable legacy stuff */
-		pci_write_config_byte (pdev, 0x42, 0x00);
-		udelay(10);
-#endif
-
-		/* route FM trap to IRQ, disable FM trap */
-		pci_write_config_byte (pdev, 0x48, 0x05);
-		udelay(10);
-	}
-	
-	/* disable all codec GPI interrupts */
-	outl (0, pci_resource_start (pdev, 0) + 0x8C);
-
-	/* WARNING: this line is magic.  Remove this
-	 * and things break. */
-	/* enable variable rate */
- 	tmp16 = via_ac97_read_reg (card->ac97, AC97_EXTENDED_STATUS);
- 	if ((tmp16 & 1) == 0)
- 		via_ac97_write_reg (card->ac97, AC97_EXTENDED_STATUS, tmp16 | 1);
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-static void via_ac97_codec_wait (struct ac97_codec *codec)
-{
-	assert (codec->private_data != NULL);
-	via_ac97_wait_idle (codec->private_data);
-}
-
-
-static int __devinit via_ac97_init (struct via_info *card)
-{
-	int rc;
-	u16 tmp16;
-
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-
-	card->ac97 = ac97_alloc_codec();
-	if(card->ac97 == NULL)
-		return -ENOMEM;
-		
-	card->ac97->private_data = card;
-	card->ac97->codec_read = via_ac97_read_reg;
-	card->ac97->codec_write = via_ac97_write_reg;
-	card->ac97->codec_wait = via_ac97_codec_wait;
-
-	card->ac97->dev_mixer = register_sound_mixer (&via_mixer_fops, -1);
-	if (card->ac97->dev_mixer < 0) {
-		printk (KERN_ERR PFX "unable to register AC97 mixer, aborting\n");
-		DPRINTK ("EXIT, returning -EIO\n");
-		ac97_release_codec(card->ac97);
-		return -EIO;
-	}
-
-	rc = via_ac97_reset (card);
-	if (rc) {
-		printk (KERN_ERR PFX "unable to reset AC97 codec, aborting\n");
-		goto err_out;
-	}
-	
-	mdelay(10);
-	
-	if (ac97_probe_codec (card->ac97) == 0) {
-		printk (KERN_ERR PFX "unable to probe AC97 codec, aborting\n");
-		rc = -EIO;
-		goto err_out;
-	}
-
-	/* enable variable rate */
-	tmp16 = via_ac97_read_reg (card->ac97, AC97_EXTENDED_STATUS);
-	via_ac97_write_reg (card->ac97, AC97_EXTENDED_STATUS, tmp16 | 1);
-
- 	/*
- 	 * If we cannot enable VRA, we have a locked-rate codec.
- 	 * We try again to enable VRA before assuming so, however.
- 	 */
- 	tmp16 = via_ac97_read_reg (card->ac97, AC97_EXTENDED_STATUS);
- 	if ((tmp16 & 1) == 0) {
- 		via_ac97_write_reg (card->ac97, AC97_EXTENDED_STATUS, tmp16 | 1);
- 		tmp16 = via_ac97_read_reg (card->ac97, AC97_EXTENDED_STATUS);
- 		if ((tmp16 & 1) == 0) {
- 			card->locked_rate = 1;
- 			printk (KERN_WARNING PFX "Codec rate locked at 48Khz\n");
- 		}
- 	}
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-
-err_out:
-	unregister_sound_mixer (card->ac97->dev_mixer);
-	DPRINTK ("EXIT, returning %d\n", rc);
-	ac97_release_codec(card->ac97);
-	return rc;
-}
-
-
-static void via_ac97_cleanup (struct via_info *card)
-{
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-	assert (card->ac97->dev_mixer >= 0);
-
-	unregister_sound_mixer (card->ac97->dev_mixer);
-	ac97_release_codec(card->ac97);
-
-	DPRINTK ("EXIT\n");
-}
-
-
-
-/****************************************************************
- *
- * Interrupt-related code
- *
- */
-
-/**
- *	via_intr_channel - handle an interrupt for a single channel
- *      @card: unused
- *	@chan: handle interrupt for this channel
- *
- *	This is the "meat" of the interrupt handler,
- *	containing the actions taken each time an interrupt
- *	occurs.  All communication and coordination with
- *	userspace takes place here.
- *
- *	Locking: inside card->lock
- */
-
-static void via_intr_channel (struct via_info *card, struct via_channel *chan)
-{
-	u8 status;
-	int n;
-	
-	/* check pertinent bits of status register for action bits */
-	status = inb (chan->iobase) & (VIA_SGD_FLAG | VIA_SGD_EOL | VIA_SGD_STOPPED);
-	if (!status)
-		return;
-
-	/* acknowledge any flagged bits ASAP */
-	outb (status, chan->iobase);
-
-	if (!chan->sgtable) /* XXX: temporary solution */
-		return;
-
-	/* grab current h/w ptr value */
-	n = atomic_read (&chan->hw_ptr);
-
-	/* sanity check: make sure our h/w ptr doesn't have a weird value */
-	assert (n >= 0);
-	assert (n < chan->frag_number);
-
-	
-	/* reset SGD data structure in memory to reflect a full buffer,
-	 * and advance the h/w ptr, wrapping around to zero if needed
-	 */
-	if (n == (chan->frag_number - 1)) {
-		chan->sgtable[n].count = cpu_to_le32(chan->frag_size | VIA_EOL);
-		atomic_set (&chan->hw_ptr, 0);
-	} else {
-		chan->sgtable[n].count = cpu_to_le32(chan->frag_size | VIA_FLAG);
-		atomic_inc (&chan->hw_ptr);
-	}
-
-	/* accounting crap for SNDCTL_DSP_GETxPTR */
-	chan->n_irqs++;
-	chan->bytes += chan->frag_size;
-	/* FIXME - signed overflow is undefined */
-	if (chan->bytes < 0) /* handle overflow of 31-bit value */
-		chan->bytes = chan->frag_size;
-	/* all following checks only occur when not in mmap(2) mode */
-	if (!chan->is_mapped)
-	{
-		/* If we are recording, then n_frags represents the number
-		 * of fragments waiting to be handled by userspace.
-		 * If we are playback, then n_frags represents the number
-		 * of fragments remaining to be filled by userspace.
-		 * We increment here.  If we reach max number of fragments,
-		 * this indicates an underrun/overrun.  For this case under OSS,
-		 * we stop the record/playback process.
-		 */
-		if (atomic_read (&chan->n_frags) < chan->frag_number)
-			atomic_inc (&chan->n_frags);
-		assert (atomic_read (&chan->n_frags) <= chan->frag_number);
-		if (atomic_read (&chan->n_frags) == chan->frag_number) {
-			chan->is_active = 0;
-			via_chan_stop (chan->iobase);
-		}
-	}
-	/* wake up anyone listening to see when interrupts occur */
-	wake_up_all (&chan->wait);
-
-	DPRINTK ("%s intr, status=0x%02X, hwptr=0x%lX, chan->hw_ptr=%d\n",
-		 chan->name, status, (long) inl (chan->iobase + 0x04),
-		 atomic_read (&chan->hw_ptr));
-
-	DPRINTK ("%s intr, channel n_frags == %d, missed %d\n", chan->name,
-		 atomic_read (&chan->n_frags), missed);
-}
-
-
-static irqreturn_t  via_interrupt(int irq, void *dev_id)
-{
-	struct via_info *card = dev_id;
-	u32 status32;
-
-	/* to minimize interrupt sharing costs, we use the SGD status
-	 * shadow register to check the status of all inputs and
-	 * outputs with a single 32-bit bus read.  If no interrupt
-	 * conditions are flagged, we exit immediately
-	 */
-	status32 = inl (card->baseaddr + VIA_BASE0_SGD_STATUS_SHADOW);
-	if (!(status32 & VIA_INTR_MASK))
-        {
-#ifdef CONFIG_MIDI_VIA82CXXX
-	    	 if (card->midi_devc)
-                    	uart401intr(irq, card->midi_devc);
-#endif
-		return IRQ_HANDLED;
-    	}
-	DPRINTK ("intr, status32 == 0x%08X\n", status32);
-
-	/* synchronize interrupt handling under SMP.  this spinlock
-	 * goes away completely on UP
-	 */
-	spin_lock (&card->lock);
-
-	if (status32 & VIA_INTR_OUT)
-		via_intr_channel (card, &card->ch_out);
-	if (status32 & VIA_INTR_IN)
-		via_intr_channel (card, &card->ch_in);
-	if (status32 & VIA_INTR_FM)
-		via_intr_channel (card, &card->ch_fm);
-
-	spin_unlock (&card->lock);
-	
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t via_new_interrupt(int irq, void *dev_id)
-{
-	struct via_info *card = dev_id;
-	u32 status32;
-
-	/* to minimize interrupt sharing costs, we use the SGD status
-	 * shadow register to check the status of all inputs and
-	 * outputs with a single 32-bit bus read.  If no interrupt
-	 * conditions are flagged, we exit immediately
-	 */
-	status32 = inl (card->baseaddr + VIA_BASE0_SGD_STATUS_SHADOW);
-	if (!(status32 & VIA_NEW_INTR_MASK))
-		return IRQ_NONE;
-	/*
-	 * goes away completely on UP
-	 */
-	spin_lock (&card->lock);
-
-	via_intr_channel (card, &card->ch_out);
-	via_intr_channel (card, &card->ch_in);
-	via_intr_channel (card, &card->ch_fm);
-
-	spin_unlock (&card->lock);
-	return IRQ_HANDLED;
-}
-
-
-/**
- *	via_interrupt_init - Initialize interrupt handling
- *	@card: Private info for specified board
- *
- *	Obtain and reserve IRQ for using in handling audio events.
- *	Also, disable any IRQ-generating resources, to make sure
- *	we don't get interrupts before we want them.
- */
-
-static int via_interrupt_init (struct via_info *card)
-{
-	u8 tmp8;
-
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-	assert (card->pdev != NULL);
-
-	/* check for sane IRQ number. can this ever happen? */
-	if (card->pdev->irq < 2) {
-		printk (KERN_ERR PFX "insane IRQ %d, aborting\n",
-			card->pdev->irq);
-		DPRINTK ("EXIT, returning -EIO\n");
-		return -EIO;
-	}
-
-	/* VIA requires this is done */
-	pci_write_config_byte(card->pdev, PCI_INTERRUPT_LINE, card->pdev->irq);
-	
-	if(card->legacy)
-	{
-		/* make sure FM irq is not routed to us */
-		pci_read_config_byte (card->pdev, VIA_FM_NMI_CTRL, &tmp8);
-		if ((tmp8 & VIA_CR48_FM_TRAP_TO_NMI) == 0) {
-			tmp8 |= VIA_CR48_FM_TRAP_TO_NMI;
-			pci_write_config_byte (card->pdev, VIA_FM_NMI_CTRL, tmp8);
-		}
-		if (request_irq (card->pdev->irq, via_interrupt, IRQF_SHARED, VIA_MODULE_NAME, card)) {
-			printk (KERN_ERR PFX "unable to obtain IRQ %d, aborting\n",
-				card->pdev->irq);
-			DPRINTK ("EXIT, returning -EBUSY\n");
-			return -EBUSY;
-		}
-	}
-	else 
-	{
-		if (request_irq (card->pdev->irq, via_new_interrupt, IRQF_SHARED, VIA_MODULE_NAME, card)) {
-			printk (KERN_ERR PFX "unable to obtain IRQ %d, aborting\n",
-				card->pdev->irq);
-			DPRINTK ("EXIT, returning -EBUSY\n");
-			return -EBUSY;
-		}
-	}
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-/****************************************************************
- *
- * OSS DSP device
- *
- */
-
-static const struct file_operations via_dsp_fops = {
-	.owner		= THIS_MODULE,
-	.open		= via_dsp_open,
-	.release	= via_dsp_release,
-	.read		= via_dsp_read,
-	.write		= via_dsp_write,
-	.poll		= via_dsp_poll,
-	.llseek		= no_llseek,
-	.ioctl		= via_dsp_ioctl,
-	.mmap		= via_dsp_mmap,
-};
-
-
-static int __devinit via_dsp_init (struct via_info *card)
-{
-	u8 tmp8;
-
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-
-	if(card->legacy)
-	{
-		/* turn off legacy features, if not already */
-		pci_read_config_byte (card->pdev, VIA_FUNC_ENABLE, &tmp8);
-		if (tmp8 & (VIA_CR42_SB_ENABLE |  VIA_CR42_FM_ENABLE)) {
-			tmp8 &= ~(VIA_CR42_SB_ENABLE | VIA_CR42_FM_ENABLE);
-			pci_write_config_byte (card->pdev, VIA_FUNC_ENABLE, tmp8);
-		}
-	}
-
-	via_stop_everything (card);
-
-	card->dev_dsp = register_sound_dsp (&via_dsp_fops, -1);
-	if (card->dev_dsp < 0) {
-		DPRINTK ("EXIT, returning -ENODEV\n");
-		return -ENODEV;
-	}
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-static void via_dsp_cleanup (struct via_info *card)
-{
-	DPRINTK ("ENTER\n");
-
-	assert (card != NULL);
-	assert (card->dev_dsp >= 0);
-
-	via_stop_everything (card);
-
-	unregister_sound_dsp (card->dev_dsp);
-
-	DPRINTK ("EXIT\n");
-}
-
-
-static struct page * via_mm_nopage (struct vm_area_struct * vma,
-				    unsigned long address, int *type)
-{
-	struct via_info *card = vma->vm_private_data;
-	struct via_channel *chan = &card->ch_out;
-	unsigned long max_bufs;
-	struct page *dmapage;
-	unsigned long pgoff;
-	int rd, wr;
-
-	DPRINTK ("ENTER, start %lXh, ofs %lXh, pgoff %ld, addr %lXh\n",
-		 vma->vm_start,
-		 address - vma->vm_start,
-		 (address - vma->vm_start) >> PAGE_SHIFT,
-		 address);
-
-        if (address > vma->vm_end) {
-		DPRINTK ("EXIT, returning NOPAGE_SIGBUS\n");
-		return NOPAGE_SIGBUS; /* Disallow mremap */
-	}
-        if (!card) {
-		DPRINTK ("EXIT, returning NOPAGE_SIGBUS\n");
-		return NOPAGE_SIGBUS;	/* Nothing allocated */
-	}
-
-	pgoff = vma->vm_pgoff + ((address - vma->vm_start) >> PAGE_SHIFT);
-	rd = card->ch_in.is_mapped;
-	wr = card->ch_out.is_mapped;
-
-	max_bufs = chan->frag_number;
-	if (rd && wr)
-		max_bufs *= 2;
-	if (pgoff >= max_bufs)
-		return NOPAGE_SIGBUS;
-
-	/* if full-duplex (read+write) and we have two sets of bufs,
-	 * then the playback buffers come first, sez soundcard.c */
-	if (pgoff >= chan->page_number) {
-		pgoff -= chan->page_number;
-		chan = &card->ch_in;
-	} else if (!wr)
-		chan = &card->ch_in;
-
-	assert ((((unsigned long)chan->pgtbl[pgoff].cpuaddr) % PAGE_SIZE) == 0);
-
-	dmapage = virt_to_page (chan->pgtbl[pgoff].cpuaddr);
-	DPRINTK ("EXIT, returning page %p for cpuaddr %lXh\n",
-		 dmapage, (unsigned long) chan->pgtbl[pgoff].cpuaddr);
-	get_page (dmapage);
-	if (type)
-		*type = VM_FAULT_MINOR;
-	return dmapage;
-}
-
-
-#ifndef VM_RESERVED
-static int via_mm_swapout (struct page *page, struct file *filp)
-{
-	return 0;
-}
-#endif /* VM_RESERVED */
-
-
-static struct vm_operations_struct via_mm_ops = {
-	.nopage		= via_mm_nopage,
-
-#ifndef VM_RESERVED
-	.swapout	= via_mm_swapout,
-#endif
-};
-
-
-static int via_dsp_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct via_info *card;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-	int rc = -EINVAL, rd=0, wr=0;
-	unsigned long max_size, size, start, offset;
-
-	assert (file != NULL);
-	assert (vma != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	DPRINTK ("ENTER, start %lXh, size %ld, pgoff %ld\n",
-		 vma->vm_start,
-		 vma->vm_end - vma->vm_start,
-		 vma->vm_pgoff);
-
-	max_size = 0;
-	if (vma->vm_flags & VM_READ) {
-		rd = 1;
-		via_chan_set_buffering(card, &card->ch_in, -1);
-		via_chan_buffer_init (card, &card->ch_in);
-		max_size += card->ch_in.page_number << PAGE_SHIFT;
-	}
-	if (vma->vm_flags & VM_WRITE) {
-		wr = 1;
-		via_chan_set_buffering(card, &card->ch_out, -1);
-		via_chan_buffer_init (card, &card->ch_out);
-		max_size += card->ch_out.page_number << PAGE_SHIFT;
-	}
-
-	start = vma->vm_start;
-	offset = (vma->vm_pgoff << PAGE_SHIFT);
-	size = vma->vm_end - vma->vm_start;
-
-	/* some basic size/offset sanity checks */
-	if (size > max_size)
-		goto out;
-	if (offset > max_size - size)
-		goto out;
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc) goto out;
-
-	vma->vm_ops = &via_mm_ops;
-	vma->vm_private_data = card;
-
-#ifdef VM_RESERVED
-	vma->vm_flags |= VM_RESERVED;
-#endif
-
-	if (rd)
-		card->ch_in.is_mapped = 1;
-	if (wr)
-		card->ch_out.is_mapped = 1;
-
-	mutex_unlock(&card->syscall_mutex);
-	rc = 0;
-
-out:
-	DPRINTK ("EXIT, returning %d\n", rc);
-	return rc;
-}
-
-
-static ssize_t via_dsp_do_read (struct via_info *card,
-				char __user *userbuf, size_t count,
-				int nonblock)
-{
-        DECLARE_WAITQUEUE(wait, current);
-	const char __user *orig_userbuf = userbuf;
-	struct via_channel *chan = &card->ch_in;
-	size_t size;
-	int n, tmp;
-	ssize_t ret = 0;
-
-	/* if SGD has not yet been started, start it */
-	via_chan_maybe_start (chan);
-
-handle_one_block:
-	/* just to be a nice neighbor */
-	/* Thomas Sailer:
-	 * But also to ourselves, release semaphore if we do so */
-	if (need_resched()) {
-		mutex_unlock(&card->syscall_mutex);
-		schedule ();
-		ret = via_syscall_down (card, nonblock);
-		if (ret)
-			goto out;
-	}
-
-	/* grab current channel software pointer.  In the case of
-	 * recording, this is pointing to the next buffer that
-	 * will receive data from the audio hardware.
-	 */
-	n = chan->sw_ptr;
-
-	/* n_frags represents the number of fragments waiting
-	 * to be copied to userland.  sleep until at least
-	 * one buffer has been read from the audio hardware.
-	 */
-	add_wait_queue(&chan->wait, &wait);
-	for (;;) {
-		__set_current_state(TASK_INTERRUPTIBLE);
-		tmp = atomic_read (&chan->n_frags);
-		assert (tmp >= 0);
-		assert (tmp <= chan->frag_number);
-		if (tmp)
-			break;
-		if (nonblock || !chan->is_active) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		mutex_unlock(&card->syscall_mutex);
-
-		DPRINTK ("Sleeping on block %d\n", n);
-		schedule();
-
-		ret = via_syscall_down (card, nonblock);
-		if (ret)
-			break;
-
-		if (signal_pending (current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&chan->wait, &wait);
-	if (ret)
-		goto out;
-
-	/* Now that we have a buffer we can read from, send
-	 * as much as sample data possible to userspace.
-	 */
-	while ((count > 0) && (chan->slop_len < chan->frag_size)) {
-		size_t slop_left = chan->frag_size - chan->slop_len;
-		void *base = chan->pgtbl[n / (PAGE_SIZE / chan->frag_size)].cpuaddr;
-		unsigned ofs = (n % (PAGE_SIZE / chan->frag_size)) * chan->frag_size;
-
-		size = (count < slop_left) ? count : slop_left;
-		if (copy_to_user (userbuf,
-				  base + ofs + chan->slop_len,
-				  size)) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		count -= size;
-		chan->slop_len += size;
-		userbuf += size;
-	}
-
-	/* If we didn't copy the buffer completely to userspace,
-	 * stop now.
-	 */
-	if (chan->slop_len < chan->frag_size)
-		goto out;
-
-	/*
-	 * If we get to this point, we copied one buffer completely
-	 * to userspace, give the buffer back to the hardware.
-	 */
-
-	/* advance channel software pointer to point to
-	 * the next buffer from which we will copy
-	 */
-	if (chan->sw_ptr == (chan->frag_number - 1))
-		chan->sw_ptr = 0;
-	else
-		chan->sw_ptr++;
-
-	/* mark one less buffer waiting to be processed */
-	assert (atomic_read (&chan->n_frags) > 0);
-	atomic_dec (&chan->n_frags);
-
-	/* we are at a block boundary, there is no fragment data */
-	chan->slop_len = 0;
-
-	DPRINTK ("Flushed block %u, sw_ptr now %u, n_frags now %d\n",
-		n, chan->sw_ptr, atomic_read (&chan->n_frags));
-
-	DPRINTK ("regs==%02X %02X %02X %08X %08X %08X %08X\n",
-		 inb (card->baseaddr + 0x00),
-		 inb (card->baseaddr + 0x01),
-		 inb (card->baseaddr + 0x02),
-		 inl (card->baseaddr + 0x04),
-		 inl (card->baseaddr + 0x0C),
-		 inl (card->baseaddr + 0x80),
-		 inl (card->baseaddr + 0x84));
-
-	if (count > 0)
-		goto handle_one_block;
-
-out:
-	return (userbuf != orig_userbuf) ? (userbuf - orig_userbuf) : ret;
-}
-
-
-static ssize_t via_dsp_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
-{
-	struct via_info *card;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-	int rc;
-
-	DPRINTK ("ENTER, file=%p, buffer=%p, count=%u, ppos=%lu\n",
-		 file, buffer, count, ppos ? ((unsigned long)*ppos) : 0);
-
-	assert (file != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc) goto out;
-
-	if (card->ch_in.is_mapped) {
-		rc = -ENXIO;
-		goto out_up;
-	}
-
-	via_chan_set_buffering(card, &card->ch_in, -1);
-        rc = via_chan_buffer_init (card, &card->ch_in);
-
-	if (rc)
-		goto out_up;
-
-	rc = via_dsp_do_read (card, buffer, count, nonblock);
-
-out_up:
-	mutex_unlock(&card->syscall_mutex);
-out:
-	DPRINTK ("EXIT, returning %ld\n",(long) rc);
-	return rc;
-}
-
-
-static ssize_t via_dsp_do_write (struct via_info *card,
-				 const char __user *userbuf, size_t count,
-				 int nonblock)
-{
-        DECLARE_WAITQUEUE(wait, current);
-	const char __user *orig_userbuf = userbuf;
-	struct via_channel *chan = &card->ch_out;
-	volatile struct via_sgd_table *sgtable = chan->sgtable;
-	size_t size;
-	int n, tmp;
-	ssize_t ret = 0;
-
-handle_one_block:
-	/* just to be a nice neighbor */
-	/* Thomas Sailer:
-	 * But also to ourselves, release semaphore if we do so */
-	if (need_resched()) {
-		mutex_unlock(&card->syscall_mutex);
-		schedule ();
-		ret = via_syscall_down (card, nonblock);
-		if (ret)
-			goto out;
-	}
-
-	/* grab current channel fragment pointer.  In the case of
-	 * playback, this is pointing to the next fragment that
-	 * should receive data from userland.
-	 */
-	n = chan->sw_ptr;
-
-	/* n_frags represents the number of fragments remaining
-	 * to be filled by userspace.  Sleep until
-	 * at least one fragment is available for our use.
-	 */
-	add_wait_queue(&chan->wait, &wait);
-	for (;;) {
-		__set_current_state(TASK_INTERRUPTIBLE);
-		tmp = atomic_read (&chan->n_frags);
-		assert (tmp >= 0);
-		assert (tmp <= chan->frag_number);
-		if (tmp)
-			break;
-		if (nonblock || !chan->is_active) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		mutex_unlock(&card->syscall_mutex);
-
-		DPRINTK ("Sleeping on page %d, tmp==%d, ir==%d\n", n, tmp, chan->is_record);
-		schedule();
-
-		ret = via_syscall_down (card, nonblock);
-		if (ret)
-			break;
-
-		if (signal_pending (current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&chan->wait, &wait);
-	if (ret)
-		goto out;
-
-	/* Now that we have at least one fragment we can write to, fill the buffer
-	 * as much as possible with data from userspace.
-	 */
-	while ((count > 0) && (chan->slop_len < chan->frag_size)) {
-		size_t slop_left = chan->frag_size - chan->slop_len;
-
-		size = (count < slop_left) ? count : slop_left;
-		if (copy_from_user (chan->pgtbl[n / (PAGE_SIZE / chan->frag_size)].cpuaddr + (n % (PAGE_SIZE / chan->frag_size)) * chan->frag_size + chan->slop_len,
-				    userbuf, size)) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		count -= size;
-		chan->slop_len += size;
-		userbuf += size;
-	}
-
-	/* If we didn't fill up the buffer with data, stop now.
-         * Put a 'stop' marker in the DMA table too, to tell the
-         * audio hardware to stop if it gets here.
-         */
-	if (chan->slop_len < chan->frag_size) {
-		sgtable[n].count = cpu_to_le32 (chan->slop_len | VIA_EOL | VIA_STOP);
-		goto out;
-	}
-
-	/*
-         * If we get to this point, we have filled a buffer with
-         * audio data, flush the buffer to audio hardware.
-         */
-
-	/* Record the true size for the audio hardware to notice */
-        if (n == (chan->frag_number - 1))
-                sgtable[n].count = cpu_to_le32 (chan->frag_size | VIA_EOL);
-        else
-                sgtable[n].count = cpu_to_le32 (chan->frag_size | VIA_FLAG);
-
-	/* advance channel software pointer to point to
-	 * the next buffer we will fill with data
-	 */
-	if (chan->sw_ptr == (chan->frag_number - 1))
-		chan->sw_ptr = 0;
-	else
-		chan->sw_ptr++;
-
-	/* mark one less buffer as being available for userspace consumption */
-	assert (atomic_read (&chan->n_frags) > 0);
-	atomic_dec (&chan->n_frags);
-
-	/* we are at a block boundary, there is no fragment data */
-	chan->slop_len = 0;
-
-	/* if SGD has not yet been started, start it */
-	via_chan_maybe_start (chan);
-
-	DPRINTK ("Flushed block %u, sw_ptr now %u, n_frags now %d\n",
-		n, chan->sw_ptr, atomic_read (&chan->n_frags));
-
-	DPRINTK ("regs==S=%02X C=%02X TP=%02X BP=%08X RT=%08X SG=%08X CC=%08X SS=%08X\n",
-		 inb (card->baseaddr + 0x00),
-		 inb (card->baseaddr + 0x01),
-		 inb (card->baseaddr + 0x02),
-		 inl (card->baseaddr + 0x04),
-		 inl (card->baseaddr + 0x08),
-		 inl (card->baseaddr + 0x0C),
-		 inl (card->baseaddr + 0x80),
-		 inl (card->baseaddr + 0x84));
-
-	if (count > 0)
-		goto handle_one_block;
-
-out:
-	if (userbuf - orig_userbuf)
-		return userbuf - orig_userbuf;
-	else
-		return ret;
-}
-
-
-static ssize_t via_dsp_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
-{
-	struct via_info *card;
-	ssize_t rc;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-
-	DPRINTK ("ENTER, file=%p, buffer=%p, count=%u, ppos=%lu\n",
-		 file, buffer, count, ppos ? ((unsigned long)*ppos) : 0);
-
-	assert (file != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc) goto out;
-
-	if (card->ch_out.is_mapped) {
-		rc = -ENXIO;
-		goto out_up;
-	}
-
-	via_chan_set_buffering(card, &card->ch_out, -1);
-	rc = via_chan_buffer_init (card, &card->ch_out);
-
-	if (rc)
-		goto out_up;
-
-	rc = via_dsp_do_write (card, buffer, count, nonblock);
-
-out_up:
-	mutex_unlock(&card->syscall_mutex);
-out:
-	DPRINTK ("EXIT, returning %ld\n",(long) rc);
-	return rc;
-}
-
-
-static unsigned int via_dsp_poll(struct file *file, struct poll_table_struct *wait)
-{
-	struct via_info *card;
-	struct via_channel *chan;
-	unsigned int mask = 0;
-
-	DPRINTK ("ENTER\n");
-
-	assert (file != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	if (file->f_mode & FMODE_READ) {
-		chan = &card->ch_in;
-		if (sg_active (chan->iobase))
-	                poll_wait(file, &chan->wait, wait);
-		if (atomic_read (&chan->n_frags) > 0)
-			mask |= POLLIN | POLLRDNORM;
-	}
-
-	if (file->f_mode & FMODE_WRITE) {
-		chan = &card->ch_out;
-		if (sg_active (chan->iobase))
-	                poll_wait(file, &chan->wait, wait);
-		if (atomic_read (&chan->n_frags) > 0)
-			mask |= POLLOUT | POLLWRNORM;
-	}
-
-	DPRINTK ("EXIT, returning %u\n", mask);
-	return mask;
-}
-
-
-/**
- *	via_dsp_drain_playback - sleep until all playback samples are flushed
- *	@card: Private info for specified board
- *	@chan: Channel to drain
- *	@nonblock: boolean, non-zero if O_NONBLOCK is set
- *
- *	Sleeps until all playback has been flushed to the audio
- *	hardware.
- *
- *	Locking: inside card->syscall_mutex
- */
-
-static int via_dsp_drain_playback (struct via_info *card,
-				   struct via_channel *chan, int nonblock)
-{
-        DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DPRINTK ("ENTER, nonblock = %d\n", nonblock);
-
-	if (chan->slop_len > 0)
-		via_chan_flush_frag (chan);
-
-	if (atomic_read (&chan->n_frags) == chan->frag_number)
-		goto out;
-
-	via_chan_maybe_start (chan);
-
-	add_wait_queue(&chan->wait, &wait);
-	for (;;) {
-		DPRINTK ("FRAGS %d FRAGNUM %d\n", atomic_read(&chan->n_frags), chan->frag_number);
-		__set_current_state(TASK_INTERRUPTIBLE);
-		if (atomic_read (&chan->n_frags) >= chan->frag_number)
-			break;
-
-		if (nonblock) {
-			DPRINTK ("EXIT, returning -EAGAIN\n");
-			ret = -EAGAIN;
-			break;
-		}
-
-#ifdef VIA_DEBUG
-		{
-		u8 r40,r41,r42,r43,r44,r48;
-		pci_read_config_byte (card->pdev, 0x40, &r40);
-		pci_read_config_byte (card->pdev, 0x41, &r41);
-		pci_read_config_byte (card->pdev, 0x42, &r42);
-		pci_read_config_byte (card->pdev, 0x43, &r43);
-		pci_read_config_byte (card->pdev, 0x44, &r44);
-		pci_read_config_byte (card->pdev, 0x48, &r48);
-		DPRINTK ("PCI config: %02X %02X %02X %02X %02X %02X\n",
-			r40,r41,r42,r43,r44,r48);
-
-		DPRINTK ("regs==%02X %02X %02X %08X %08X %08X %08X\n",
-			 inb (card->baseaddr + 0x00),
-			 inb (card->baseaddr + 0x01),
-			 inb (card->baseaddr + 0x02),
-			 inl (card->baseaddr + 0x04),
-			 inl (card->baseaddr + 0x0C),
-			 inl (card->baseaddr + 0x80),
-			 inl (card->baseaddr + 0x84));
-		}
-
-		if (!chan->is_active)
-			printk (KERN_ERR "sleeping but not active\n");
-#endif
-
-		mutex_unlock(&card->syscall_mutex);
-
-		DPRINTK ("sleeping, nbufs=%d\n", atomic_read (&chan->n_frags));
-		schedule();
-
-		if ((ret = via_syscall_down (card, nonblock)))
-			break;
-
-		if (signal_pending (current)) {
-			DPRINTK ("EXIT, returning -ERESTARTSYS\n");
-			ret = -ERESTARTSYS;
-			break;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&chan->wait, &wait);
-
-#ifdef VIA_DEBUG
-	{
-		u8 r40,r41,r42,r43,r44,r48;
-		pci_read_config_byte (card->pdev, 0x40, &r40);
-		pci_read_config_byte (card->pdev, 0x41, &r41);
-		pci_read_config_byte (card->pdev, 0x42, &r42);
-		pci_read_config_byte (card->pdev, 0x43, &r43);
-		pci_read_config_byte (card->pdev, 0x44, &r44);
-		pci_read_config_byte (card->pdev, 0x48, &r48);
-		DPRINTK ("PCI config: %02X %02X %02X %02X %02X %02X\n",
-			r40,r41,r42,r43,r44,r48);
-
-		DPRINTK ("regs==%02X %02X %02X %08X %08X %08X %08X\n",
-			 inb (card->baseaddr + 0x00),
-			 inb (card->baseaddr + 0x01),
-			 inb (card->baseaddr + 0x02),
-			 inl (card->baseaddr + 0x04),
-			 inl (card->baseaddr + 0x0C),
-			 inl (card->baseaddr + 0x80),
-			 inl (card->baseaddr + 0x84));
-
-		DPRINTK ("final nbufs=%d\n", atomic_read (&chan->n_frags));
-	}
-#endif
-
-out:
-	DPRINTK ("EXIT, returning %d\n", ret);
-	return ret;
-}
-
-
-/**
- *	via_dsp_ioctl_space - get information about channel buffering
- *	@card: Private info for specified board
- *	@chan: pointer to channel-specific info
- *	@arg: user buffer for returned information
- *
- *	Handles SNDCTL_DSP_GETISPACE and SNDCTL_DSP_GETOSPACE.
- *
- *	Locking: inside card->syscall_mutex
- */
-
-static int via_dsp_ioctl_space (struct via_info *card,
-				struct via_channel *chan,
-				void __user *arg)
-{
-	audio_buf_info info;
-
-	via_chan_set_buffering(card, chan, -1);
-
-	info.fragstotal = chan->frag_number;
-	info.fragsize = chan->frag_size;
-
-	/* number of full fragments we can read/write without blocking */
-	info.fragments = atomic_read (&chan->n_frags);
-
-	if ((chan->slop_len % chan->frag_size > 0) && (info.fragments > 0))
-		info.fragments--;
-
-	/* number of bytes that can be read or written immediately
-	 * without blocking.
-	 */
-	info.bytes = (info.fragments * chan->frag_size);
-	if (chan->slop_len % chan->frag_size > 0)
-		info.bytes += chan->frag_size - (chan->slop_len % chan->frag_size);
-
-	DPRINTK ("EXIT, returning fragstotal=%d, fragsize=%d, fragments=%d, bytes=%d\n",
-		info.fragstotal,
-		info.fragsize,
-		info.fragments,
-		info.bytes);
-
-	return copy_to_user (arg, &info, sizeof (info))?-EFAULT:0;
-}
-
-
-/**
- *	via_dsp_ioctl_ptr - get information about hardware buffer ptr
- *	@card: Private info for specified board
- *	@chan: pointer to channel-specific info
- *	@arg: user buffer for returned information
- *
- *	Handles SNDCTL_DSP_GETIPTR and SNDCTL_DSP_GETOPTR.
- *
- *	Locking: inside card->syscall_mutex
- */
-
-static int via_dsp_ioctl_ptr (struct via_info *card,
-				struct via_channel *chan,
-				void __user *arg)
-{
-	count_info info;
-
-	spin_lock_irq (&card->lock);
-
-	info.bytes = chan->bytes;
-	info.blocks = chan->n_irqs;
-	chan->n_irqs = 0;
-
-	spin_unlock_irq (&card->lock);
-
-	if (chan->is_active) {
-		unsigned long extra;
-		info.ptr = atomic_read (&chan->hw_ptr) * chan->frag_size;
-		extra = chan->frag_size - via_sg_offset(chan);
-		info.ptr += extra;
-		info.bytes += extra;
-	} else {
-		info.ptr = 0;
-	}
-
-	DPRINTK ("EXIT, returning bytes=%d, blocks=%d, ptr=%d\n",
-		info.bytes,
-		info.blocks,
-		info.ptr);
-
-	return copy_to_user (arg, &info, sizeof (info))?-EFAULT:0;
-}
-
-
-static int via_dsp_ioctl_trigger (struct via_channel *chan, int val)
-{
-	int enable, do_something;
-
-	if (chan->is_record)
-		enable = (val & PCM_ENABLE_INPUT);
-	else
-		enable = (val & PCM_ENABLE_OUTPUT);
-
-	if (!chan->is_enabled && enable) {
-		do_something = 1;
-	} else if (chan->is_enabled && !enable) {
-		do_something = -1;
-	} else {
-		do_something = 0;
-	}
-
-	DPRINTK ("enable=%d, do_something=%d\n",
-		 enable, do_something);
-
-	if (chan->is_active && do_something)
-		return -EINVAL;
-
-	if (do_something == 1) {
-		chan->is_enabled = 1;
-		via_chan_maybe_start (chan);
-		DPRINTK ("Triggering input\n");
-	}
-
-	else if (do_something == -1) {
-		chan->is_enabled = 0;
-		DPRINTK ("Setup input trigger\n");
-	}
-
-	return 0;
-}
-
-
-static int via_dsp_ioctl (struct inode *inode, struct file *file,
-			  unsigned int cmd, unsigned long arg)
-{
-	int rc, rd=0, wr=0, val=0;
-	struct via_info *card;
-	struct via_channel *chan;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-	int __user *ip = (int __user *)arg;
-	void __user *p = (void __user *)arg;
-
-	assert (file != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	if (file->f_mode & FMODE_WRITE)
-		wr = 1;
-	if (file->f_mode & FMODE_READ)
-		rd = 1;
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc)
-		return rc;
-	rc = -EINVAL;
-
-	switch (cmd) {
-
-	/* OSS API version.  XXX unverified */
-	case OSS_GETVERSION:
-		DPRINTK ("ioctl OSS_GETVERSION, EXIT, returning SOUND_VERSION\n");
-		rc = put_user (SOUND_VERSION, ip);
-		break;
-
-	/* list of supported PCM data formats */
-	case SNDCTL_DSP_GETFMTS:
-		DPRINTK ("DSP_GETFMTS, EXIT, returning AFMT U8|S16_LE\n");
-                rc = put_user (AFMT_U8 | AFMT_S16_LE, ip);
-		break;
-
-	/* query or set current channel's PCM data format */
-	case SNDCTL_DSP_SETFMT:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_SETFMT, val==%d\n", val);
-		if (val != AFMT_QUERY) {
-			rc = 0;
-
-			if (rd)
-				rc = via_chan_set_fmt (card, &card->ch_in, val);
-
-			if (rc >= 0 && wr)
-				rc = via_chan_set_fmt (card, &card->ch_out, val);
-
-			if (rc < 0)
-				break;
-
-			val = rc;
-		} else {
-			if ((rd && (card->ch_in.pcm_fmt & VIA_PCM_FMT_16BIT)) ||
-			    (wr && (card->ch_out.pcm_fmt & VIA_PCM_FMT_16BIT)))
-				val = AFMT_S16_LE;
-			else
-				val = AFMT_U8;
-		}
-		DPRINTK ("SETFMT EXIT, returning %d\n", val);
-                rc = put_user (val, ip);
-		break;
-
-	/* query or set number of channels (1=mono, 2=stereo, 4/6 for multichannel) */
-        case SNDCTL_DSP_CHANNELS:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_CHANNELS, val==%d\n", val);
-		if (val != 0) {
-			rc = 0;
-
-			if (rd)
-				rc = via_chan_set_stereo (card, &card->ch_in, val);
-
-			if (rc >= 0 && wr)
-				rc = via_chan_set_stereo (card, &card->ch_out, val);
-
-			if (rc < 0)
-				break;
-
-			val = rc;
-		} else {
-			if (rd)
-				val = card->ch_in.channels;
-			else
-				val = card->ch_out.channels;
-		}
-		DPRINTK ("CHANNELS EXIT, returning %d\n", val);
-                rc = put_user (val, ip);
-		break;
-
-	/* enable (val is not zero) or disable (val == 0) stereo */
-        case SNDCTL_DSP_STEREO:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_STEREO, val==%d\n", val);
-		rc = 0;
-
-		if (rd)
-			rc = via_chan_set_stereo (card, &card->ch_in, val ? 2 : 1);
-		if (rc >= 0 && wr)
-			rc = via_chan_set_stereo (card, &card->ch_out, val ? 2 : 1);
-
-		if (rc < 0)
-			break;
-
-		val = rc - 1;
-
-		DPRINTK ("STEREO EXIT, returning %d\n", val);
-		rc = put_user(val, ip);
-		break;
-
-	/* query or set sampling rate */
-        case SNDCTL_DSP_SPEED:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_SPEED, val==%d\n", val);
-		if (val < 0) {
-			rc = -EINVAL;
-			break;
-		}
-		if (val > 0) {
-			rc = 0;
-
-			if (rd)
-				rc = via_chan_set_speed (card, &card->ch_in, val);
-			if (rc >= 0 && wr)
-				rc = via_chan_set_speed (card, &card->ch_out, val);
-
-			if (rc < 0)
-				break;
-
-			val = rc;
-		} else {
-			if (rd)
-				val = card->ch_in.rate;
-			else if (wr)
-				val = card->ch_out.rate;
-			else
-				val = 0;
-		}
-		DPRINTK ("SPEED EXIT, returning %d\n", val);
-                rc = put_user (val, ip);
-		break;
-
-	/* wait until all buffers have been played, and then stop device */
-	case SNDCTL_DSP_SYNC:
-		DPRINTK ("DSP_SYNC\n");
-		rc = 0;
-		if (wr) {
-			DPRINTK ("SYNC EXIT (after calling via_dsp_drain_playback)\n");
-			rc = via_dsp_drain_playback (card, &card->ch_out, nonblock);
-		}
-		break;
-
-	/* stop recording/playback immediately */
-        case SNDCTL_DSP_RESET:
-		DPRINTK ("DSP_RESET\n");
-		if (rd) {
-			via_chan_clear (card, &card->ch_in);
-			card->ch_in.frag_number = 0;
-			card->ch_in.frag_size = 0;
-			atomic_set(&card->ch_in.n_frags, 0);
-		}
-
-		if (wr) {
-			via_chan_clear (card, &card->ch_out);
-			card->ch_out.frag_number = 0;
-			card->ch_out.frag_size = 0;
-			atomic_set(&card->ch_out.n_frags, 0);
-		}
-
-		rc = 0;
-		break;
-
-	case SNDCTL_DSP_NONBLOCK:
-		file->f_flags |= O_NONBLOCK;
-		rc = 0;
-		break;
-
-	/* obtain bitmask of device capabilities, such as mmap, full duplex, etc. */
-	case SNDCTL_DSP_GETCAPS:
-		DPRINTK ("DSP_GETCAPS\n");
-		rc = put_user(VIA_DSP_CAP, ip);
-		break;
-
-	/* obtain buffer fragment size */
-	case SNDCTL_DSP_GETBLKSIZE:
-		DPRINTK ("DSP_GETBLKSIZE\n");
-
-		if (rd) {
-			via_chan_set_buffering(card, &card->ch_in, -1);
-			rc = put_user(card->ch_in.frag_size, ip);
-		} else if (wr) {
-			via_chan_set_buffering(card, &card->ch_out, -1);
-			rc = put_user(card->ch_out.frag_size, ip);
-		}
-		break;
-
-	/* obtain information about input buffering */
-	case SNDCTL_DSP_GETISPACE:
-		DPRINTK ("DSP_GETISPACE\n");
-		if (rd)
-			rc = via_dsp_ioctl_space (card, &card->ch_in, p);
-		break;
-
-	/* obtain information about output buffering */
-	case SNDCTL_DSP_GETOSPACE:
-		DPRINTK ("DSP_GETOSPACE\n");
-		if (wr)
-			rc = via_dsp_ioctl_space (card, &card->ch_out, p);
-		break;
-
-	/* obtain information about input hardware pointer */
-	case SNDCTL_DSP_GETIPTR:
-		DPRINTK ("DSP_GETIPTR\n");
-		if (rd)
-			rc = via_dsp_ioctl_ptr (card, &card->ch_in, p);
-		break;
-
-	/* obtain information about output hardware pointer */
-	case SNDCTL_DSP_GETOPTR:
-		DPRINTK ("DSP_GETOPTR\n");
-		if (wr)
-			rc = via_dsp_ioctl_ptr (card, &card->ch_out, p);
-		break;
-
-	/* return number of bytes remaining to be played by DMA engine */
-	case SNDCTL_DSP_GETODELAY:
-		{
-		DPRINTK ("DSP_GETODELAY\n");
-
-		chan = &card->ch_out;
-
-		if (!wr)
-			break;
-
-		if (chan->is_active) {
-
-			val = chan->frag_number - atomic_read (&chan->n_frags);
-
-			assert(val >= 0);
-				
-			if (val > 0) {
-				val *= chan->frag_size;
-				val -= chan->frag_size - via_sg_offset(chan);
-			}
-			val += chan->slop_len % chan->frag_size;
-		} else
-			val = 0;
-
-		assert (val <= (chan->frag_size * chan->frag_number));
-
-		DPRINTK ("GETODELAY EXIT, val = %d bytes\n", val);
-                rc = put_user (val, ip);
-		break;
-		}
-
-	/* handle the quick-start of a channel,
-	 * or the notification that a quick-start will
-	 * occur in the future
-	 */
-	case SNDCTL_DSP_SETTRIGGER:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_SETTRIGGER, rd=%d, wr=%d, act=%d/%d, en=%d/%d\n",
-			rd, wr, card->ch_in.is_active, card->ch_out.is_active,
-			card->ch_in.is_enabled, card->ch_out.is_enabled);
-
-		rc = 0;
-
-		if (rd)
-			rc = via_dsp_ioctl_trigger (&card->ch_in, val);
-
-		if (!rc && wr)
-			rc = via_dsp_ioctl_trigger (&card->ch_out, val);
-
-		break;
-
-	case SNDCTL_DSP_GETTRIGGER:
-		val = 0;
-		if ((file->f_mode & FMODE_READ) && card->ch_in.is_enabled)
-			val |= PCM_ENABLE_INPUT;
-		if ((file->f_mode & FMODE_WRITE) && card->ch_out.is_enabled)
-			val |= PCM_ENABLE_OUTPUT;
-		rc = put_user(val, ip);
-		break;
-
-	/* Enable full duplex.  Since we do this as soon as we are opened
-	 * with O_RDWR, this is mainly a no-op that always returns success.
-	 */
-	case SNDCTL_DSP_SETDUPLEX:
-		DPRINTK ("DSP_SETDUPLEX\n");
-		if (!rd || !wr)
-			break;
-		rc = 0;
-		break;
-
-	/* set fragment size.  implemented as a successful no-op for now */
-	case SNDCTL_DSP_SETFRAGMENT:
-		if (get_user(val, ip)) {
-			rc = -EFAULT;
-			break;
-		}
-		DPRINTK ("DSP_SETFRAGMENT, val==%d\n", val);
-
-		if (rd)
-			rc = via_chan_set_buffering(card, &card->ch_in, val);
-
-		if (wr)
-			rc = via_chan_set_buffering(card, &card->ch_out, val);
-
-		DPRINTK ("SNDCTL_DSP_SETFRAGMENT (fragshift==0x%04X (%d), maxfrags==0x%04X (%d))\n",
-			 val & 0xFFFF,
-			 val & 0xFFFF,
-			 (val >> 16) & 0xFFFF,
-			 (val >> 16) & 0xFFFF);
-
-		rc = 0;
-		break;
-
-	/* inform device of an upcoming pause in input (or output). */
-	case SNDCTL_DSP_POST:
-		DPRINTK ("DSP_POST\n");
-		if (wr) {
-			if (card->ch_out.slop_len > 0)
-				via_chan_flush_frag (&card->ch_out);
-			via_chan_maybe_start (&card->ch_out);
-		}
-
-		rc = 0;
-		break;
-
-	/* not implemented */
-	default:
-		DPRINTK ("unhandled ioctl, cmd==%u, arg==%p\n",
-			 cmd, p);
-		break;
-	}
-
-	mutex_unlock(&card->syscall_mutex);
-	DPRINTK ("EXIT, returning %d\n", rc);
-	return rc;
-}
-
-
-static int via_dsp_open (struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct via_info *card;
-	struct pci_dev *pdev = NULL;
-	struct via_channel *chan;
-	struct pci_driver *drvr;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-
-	DPRINTK ("ENTER, minor=%d, file->f_mode=0x%x\n", minor, file->f_mode);
-
-	if (!(file->f_mode & (FMODE_READ | FMODE_WRITE))) {
-		DPRINTK ("EXIT, returning -EINVAL\n");
-		return -EINVAL;
-	}
-
-	card = NULL;
-	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) {
-		drvr = pci_dev_driver (pdev);
-		if (drvr == &via_driver) {
-			assert (pci_get_drvdata (pdev) != NULL);
-
-			card = pci_get_drvdata (pdev);
-			DPRINTK ("dev_dsp = %d, minor = %d, assn = %d\n",
-				 card->dev_dsp, minor,
-				 (card->dev_dsp ^ minor) & ~0xf);
-
-			if (((card->dev_dsp ^ minor) & ~0xf) == 0)
-				goto match;
-		}
-	}
-
-	DPRINTK ("no matching %s found\n", card ? "minor" : "driver");
-	return -ENODEV;
-
-match:
-	pci_dev_put(pdev);
-	if (nonblock) {
-		if (!mutex_trylock(&card->open_mutex)) {
-			DPRINTK ("EXIT, returning -EAGAIN\n");
-			return -EAGAIN;
-		}
-	} else {
-		if (mutex_lock_interruptible(&card->open_mutex)) {
-			DPRINTK ("EXIT, returning -ERESTARTSYS\n");
-			return -ERESTARTSYS;
-		}
-	}
-
-	file->private_data = card;
-	DPRINTK ("file->f_mode == 0x%x\n", file->f_mode);
-
-	/* handle input from analog source */
-	if (file->f_mode & FMODE_READ) {
-		chan = &card->ch_in;
-
-		via_chan_init (card, chan);
-
-		/* why is this forced to 16-bit stereo in all drivers? */
-		chan->pcm_fmt = VIA_PCM_FMT_16BIT | VIA_PCM_FMT_STEREO;
-		chan->channels = 2;
-
-		// TO DO - use FIFO: via_capture_fifo(card, 1);
-		via_chan_pcm_fmt (chan, 0);
-		via_set_rate (card->ac97, chan, 44100);
-	}
-
-	/* handle output to analog source */
-	if (file->f_mode & FMODE_WRITE) {
-		chan = &card->ch_out;
-
-		via_chan_init (card, chan);
-
-		if (file->f_mode & FMODE_READ) {
-			/* if in duplex mode make the recording and playback channels
-			   have the same settings */
-			chan->pcm_fmt = VIA_PCM_FMT_16BIT | VIA_PCM_FMT_STEREO;
-			chan->channels = 2;
-			via_chan_pcm_fmt (chan, 0);
-                        via_set_rate (card->ac97, chan, 44100);
-		} else {
-			 if ((minor & 0xf) == SND_DEV_DSP16) {
-				chan->pcm_fmt = VIA_PCM_FMT_16BIT;
-				via_chan_pcm_fmt (chan, 0);
-				via_set_rate (card->ac97, chan, 44100);
-			} else {
-				via_chan_pcm_fmt (chan, 1);
-				via_set_rate (card->ac97, chan, 8000);
-			}
-		}
-	}
-
-	DPRINTK ("EXIT, returning 0\n");
-	return nonseekable_open(inode, file);
-}
-
-
-static int via_dsp_release(struct inode *inode, struct file *file)
-{
-	struct via_info *card;
-	int nonblock = (file->f_flags & O_NONBLOCK);
-	int rc;
-
-	DPRINTK ("ENTER\n");
-
-	assert (file != NULL);
-	card = file->private_data;
-	assert (card != NULL);
-
-	rc = via_syscall_down (card, nonblock);
-	if (rc) {
-		DPRINTK ("EXIT (syscall_down error), rc=%d\n", rc);
-		return rc;
-	}
-
-	if (file->f_mode & FMODE_WRITE) {
-		rc = via_dsp_drain_playback (card, &card->ch_out, nonblock);
-		if (rc && rc != -ERESTARTSYS)	/* Nobody needs to know about ^C */
-			printk (KERN_DEBUG "via_audio: ignoring drain playback error %d\n", rc);
-
-		via_chan_free (card, &card->ch_out);
-		via_chan_buffer_free(card, &card->ch_out);
-	}
-
-	if (file->f_mode & FMODE_READ) {
-		via_chan_free (card, &card->ch_in);
-		via_chan_buffer_free (card, &card->ch_in);
-	}
-
-	mutex_unlock(&card->syscall_mutex);
-	mutex_unlock(&card->open_mutex);
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-/****************************************************************
- *
- * Chip setup and kernel registration
- *
- *
- */
-
-static int __devinit via_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
-{
-#ifdef CONFIG_MIDI_VIA82CXXX
-	u8 r42;
-#endif
-	int rc;
-	struct via_info *card;
-	static int printed_version;
-
-	DPRINTK ("ENTER\n");
-
-	if (printed_version++ == 0)
-		printk (KERN_INFO "Via 686a/8233/8235 audio driver " VIA_VERSION "\n");
-
-	rc = pci_enable_device (pdev);
-	if (rc)
-		goto err_out;
-
-	rc = pci_request_regions (pdev, "via82cxxx_audio");
-	if (rc)
-		goto err_out_disable;
-
-	rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
-	if (rc)
-		goto err_out_res;
-	rc = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
-	if (rc)
-		goto err_out_res;
-
-	card = kmalloc (sizeof (*card), GFP_KERNEL);
-	if (!card) {
-		printk (KERN_ERR PFX "out of memory, aborting\n");
-		rc = -ENOMEM;
-		goto err_out_res;
-	}
-
-	pci_set_drvdata (pdev, card);
-
-	memset (card, 0, sizeof (*card));
-	card->pdev = pdev;
-	card->baseaddr = pci_resource_start (pdev, 0);
-	card->card_num = via_num_cards++;
-	spin_lock_init (&card->lock);
-	spin_lock_init (&card->ac97_lock);
-	mutex_init(&card->syscall_mutex);
-	mutex_init(&card->open_mutex);
-
-	/* we must init these now, in case the intr handler needs them */
-	via_chan_init_defaults (card, &card->ch_out);
-	via_chan_init_defaults (card, &card->ch_in);
-	via_chan_init_defaults (card, &card->ch_fm);
-
-	/* if BAR 2 is present, chip is Rev H or later,
-	 * which means it has a few extra features */
-	if (pci_resource_start (pdev, 2) > 0)
-		card->rev_h = 1;
-		
-	/* Overkill for now, but more flexible done right */
-	
-	card->intmask = id->driver_data;
-	card->legacy = !card->intmask;
-	card->sixchannel = id->driver_data;
-	
-	if(card->sixchannel)
-		printk(KERN_INFO PFX "Six channel audio available\n");
-	if (pdev->irq < 1) {
-		printk (KERN_ERR PFX "invalid PCI IRQ %d, aborting\n", pdev->irq);
-		rc = -ENODEV;
-		goto err_out_kfree;
-	}
-
-	if (!(pci_resource_flags (pdev, 0) & IORESOURCE_IO)) {
-		printk (KERN_ERR PFX "unable to locate I/O resources, aborting\n");
-		rc = -ENODEV;
-		goto err_out_kfree;
-	}
-
-	pci_set_master(pdev);
-	
-	/*
-	 * init AC97 mixer and codec
-	 */
-	rc = via_ac97_init (card);
-	if (rc) {
-		printk (KERN_ERR PFX "AC97 init failed, aborting\n");
-		goto err_out_kfree;
-	}
-
-	/*
-	 * init DSP device
-	 */
-	rc = via_dsp_init (card);
-	if (rc) {
-		printk (KERN_ERR PFX "DSP device init failed, aborting\n");
-		goto err_out_have_mixer;
-	}
-
-	/*
-	 * init and turn on interrupts, as the last thing we do
-	 */
-	rc = via_interrupt_init (card);
-	if (rc) {
-		printk (KERN_ERR PFX "interrupt init failed, aborting\n");
-		goto err_out_have_dsp;
-	}
-
-	printk (KERN_INFO PFX "board #%d at 0x%04lX, IRQ %d\n",
-		card->card_num + 1, card->baseaddr, pdev->irq);
-
-#ifdef CONFIG_MIDI_VIA82CXXX
-	/* Disable by default */
-	card->midi_info.io_base = 0;
-
-	if(card->legacy)
-	{
-		pci_read_config_byte (pdev, 0x42, &r42);
-		/* Disable MIDI interrupt */
-		pci_write_config_byte (pdev, 0x42, r42 | VIA_CR42_MIDI_IRQMASK);
-		if (r42 & VIA_CR42_MIDI_ENABLE)
-		{
-			if (r42 & VIA_CR42_MIDI_PNP) /* Address selected by iobase 2 - not tested */
-				card->midi_info.io_base = pci_resource_start (pdev, 2);
-			else /* Address selected by byte 0x43 */
-			{
-				u8 r43;
-				pci_read_config_byte (pdev, 0x43, &r43);
-				card->midi_info.io_base = 0x300 + ((r43 & 0x0c) << 2);
-			}
-
-			card->midi_info.irq = -pdev->irq;
-			if (probe_uart401(& card->midi_info, THIS_MODULE))
-			{
-				card->midi_devc=midi_devs[card->midi_info.slots[4]]->devc;
-				pci_write_config_byte(pdev, 0x42, r42 & ~VIA_CR42_MIDI_IRQMASK);
-				printk("Enabled Via MIDI\n");
-			}
-		}
-	}
-#endif
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-
-err_out_have_dsp:
-	via_dsp_cleanup (card);
-
-err_out_have_mixer:
-	via_ac97_cleanup (card);
-
-err_out_kfree:
-#ifndef VIA_NDEBUG
-	memset (card, OSS_POISON_FREE, sizeof (*card)); /* poison memory */
-#endif
-	kfree (card);
-
-err_out_res:
-	pci_release_regions (pdev);
-
-err_out_disable:
-	pci_disable_device (pdev);
-
-err_out:
-	pci_set_drvdata (pdev, NULL);
-	DPRINTK ("EXIT - returning %d\n", rc);
-	return rc;
-}
-
-
-static void __devexit via_remove_one (struct pci_dev *pdev)
-{
-	struct via_info *card;
-
-	DPRINTK ("ENTER\n");
-
-	assert (pdev != NULL);
-	card = pci_get_drvdata (pdev);
-	assert (card != NULL);
-
-#ifdef CONFIG_MIDI_VIA82CXXX
-	if (card->midi_info.io_base)
-		unload_uart401(&card->midi_info);
-#endif
-
-	free_irq (card->pdev->irq, card);
-	via_dsp_cleanup (card);
-	via_ac97_cleanup (card);
-
-#ifndef VIA_NDEBUG
-	memset (card, OSS_POISON_FREE, sizeof (*card)); /* poison memory */
-#endif
-	kfree (card);
-
-	pci_set_drvdata (pdev, NULL);
-
-	pci_release_regions (pdev);
-	pci_disable_device (pdev);
-	pci_set_power_state (pdev, 3); /* ...zzzzzz */
-
-	DPRINTK ("EXIT\n");
-	return;
-}
-
-
-/****************************************************************
- *
- * Driver initialization and cleanup
- *
- *
- */
-
-static int __init init_via82cxxx_audio(void)
-{
-	int rc;
-
-	DPRINTK ("ENTER\n");
-
-	rc = pci_register_driver (&via_driver);
-	if (rc) {
-		DPRINTK ("EXIT, returning %d\n", rc);
-		return rc;
-	}
-
-	DPRINTK ("EXIT, returning 0\n");
-	return 0;
-}
-
-
-static void __exit cleanup_via82cxxx_audio(void)
-{
-	DPRINTK ("ENTER\n");
-
-	pci_unregister_driver (&via_driver);
-
-	DPRINTK ("EXIT\n");
-}
-
-
-module_init(init_via82cxxx_audio);
-module_exit(cleanup_via82cxxx_audio);
-
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_DESCRIPTION("DSP audio and mixer driver for Via 82Cxxx audio devices");
-MODULE_LICENSE("GPL");
-
-- 
cgit 


From 941e492bdb1239d2ca8f5736cdfd3ff83d00cb90 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 6 Feb 2008 01:36:42 -0800
Subject: read_current_timer() cleanups

- All implementations can be __devinit

- The function prototypes were in asm/timex.h but they all must be the same,
  so create a single declaration in linux/timex.h.

- uninline the sparc64 version to match the other architectures

- Don't bother #defining ARCH_HAS_READ_CURRENT_TIMER to a particular value.

[ezk@cs.sunysb.edu: fix build]
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/lib/delay.c      | 4 +++-
 arch/sparc64/kernel/time.c  | 5 +++++
 arch/x86/lib/delay_32.c     | 4 +++-
 arch/x86/lib/delay_64.c     | 4 +++-
 include/asm-avr32/timex.h   | 3 +--
 include/asm-sparc64/timex.h | 6 +-----
 include/asm-x86/timex.h     | 3 +--
 include/linux/timex.h       | 2 ++
 init/calibrate.c            | 3 +--
 9 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
index b3bc0b56e2c6..9aa8800830f3 100644
--- a/arch/avr32/lib/delay.c
+++ b/arch/avr32/lib/delay.c
@@ -12,13 +12,15 @@
 
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/timex.h>
 #include <linux/param.h>
 #include <linux/types.h>
+#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 
-int read_current_timer(unsigned long *timer_value)
+int __devinit read_current_timer(unsigned long *timer_value)
 {
 	*timer_value = sysreg_read(COUNT);
 	return 0;
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 4352ee4d8dac..d204f1ab1d4c 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -1707,6 +1707,11 @@ static void __exit rtc_mini_exit(void)
 	misc_deregister(&rtc_mini_dev);
 }
 
+int __devinit read_current_timer(unsigned long *timer_val)
+{
+	*timer_val = tick_ops->get_tick();
+	return 0;
+}
 
 module_init(rtc_mini_init);
 module_exit(rtc_mini_exit);
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index aad9d95469dc..4535e6d147ad 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -12,8 +12,10 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/timex.h>
 #include <linux/preempt.h>
 #include <linux/delay.h>
+#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/delay.h>
@@ -63,7 +65,7 @@ void use_tsc_delay(void)
 	delay_fn = delay_tsc;
 }
 
-int read_current_timer(unsigned long *timer_val)
+int __devinit read_current_timer(unsigned long *timer_val)
 {
 	if (delay_fn == delay_tsc) {
 		rdtscl(*timer_val);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 45cdd3fbd91c..bbc610518516 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -10,8 +10,10 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/timex.h>
 #include <linux/preempt.h>
 #include <linux/delay.h>
+#include <linux/init.h>
 
 #include <asm/delay.h>
 #include <asm/msr.h>
@@ -20,7 +22,7 @@
 #include <asm/smp.h>
 #endif
 
-int read_current_timer(unsigned long *timer_value)
+int __devinit read_current_timer(unsigned long *timer_value)
 {
 	rdtscll(*timer_value);
 	return 0;
diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h
index 5e44ecb3ce0c..187dcf38b210 100644
--- a/include/asm-avr32/timex.h
+++ b/include/asm-avr32/timex.h
@@ -34,7 +34,6 @@ static inline cycles_t get_cycles (void)
 	return 0;
 }
 
-extern int read_current_timer(unsigned long *timer_value);
-#define ARCH_HAS_READ_CURRENT_TIMER	1
+#define ARCH_HAS_READ_CURRENT_TIMER
 
 #endif /* __ASM_AVR32_TIMEX_H */
diff --git a/include/asm-sparc64/timex.h b/include/asm-sparc64/timex.h
index 2a5e4ebaad80..c622535c4560 100644
--- a/include/asm-sparc64/timex.h
+++ b/include/asm-sparc64/timex.h
@@ -14,10 +14,6 @@
 typedef unsigned long cycles_t;
 #define get_cycles()	tick_ops->get_tick()
 
-#define ARCH_HAS_READ_CURRENT_TIMER	1
-#define read_current_timer(timer_val_p) 	\
-({	*timer_val_p = tick_ops->get_tick();	\
-	0;					\
-})
+#define ARCH_HAS_READ_CURRENT_TIMER
 
 #endif
diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 27cfd6c599ba..43e5a78500c5 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h
@@ -14,7 +14,6 @@
 #endif
 #define CLOCK_TICK_RATE	PIT_TICK_RATE
 
-extern int read_current_timer(unsigned long *timer_value);
-#define ARCH_HAS_READ_CURRENT_TIMER	1
+#define ARCH_HAS_READ_CURRENT_TIMER
 
 #endif
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 24c6a2b59511..8ea3e71ba7fa 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -244,6 +244,8 @@ extern int do_adjtimex(struct timex *);
 /* Don't use! Compatibility define for existing users. */
 #define tickadj	(500/HZ ? : 1)
 
+int read_current_timer(unsigned long *timer_val);
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
diff --git a/init/calibrate.c b/init/calibrate.c
index 2d3d73bd4ce1..1d87891b6fcd 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -7,8 +7,7 @@
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-
-#include <asm/timex.h>
+#include <linux/timex.h>
 
 unsigned long preset_lpj;
 static int __init lpj_setup(char *str)
-- 
cgit 


From a1c9eea9e56a7196c6891f6426b799c4598b38e2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:36:44 -0800
Subject: proper prototype for signals_init()

Add a proper prototype for signals_init() in include/linux/signal.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 ++
 init/main.c            | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 0ae338866240..7e095147656c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -371,6 +371,8 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 	(!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
 	 (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
 
+void signals_init(void);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNAL_H */
diff --git a/init/main.c b/init/main.c
index cb81ed116f62..c691f5f7fc27 100644
--- a/init/main.c
+++ b/init/main.c
@@ -57,6 +57,7 @@
 #include <linux/device.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/signal.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -83,7 +84,6 @@ extern void init_IRQ(void);
 extern void fork_init(unsigned long);
 extern void mca_init(void);
 extern void sbus_init(void);
-extern void signals_init(void);
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void prio_tree_init(void);
-- 
cgit 


From 011e3fcd1e1f14ef54db30b93404ab7caa726477 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:36:47 -0800
Subject: proper prototype for get_filesystem_list()

Ad a proper prototype for migration_init() in include/linux/fs.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_misc.c | 1 -
 include/linux/fs.h  | 1 +
 init/do_mounts.c    | 3 +--
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c0a90954016f..383ff068ab2e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -65,7 +65,6 @@
  */
 extern int get_hardware_list(char *);
 extern int get_stram_list(char *);
-extern int get_filesystem_list(char *);
 extern int get_exec_domain_list(char *);
 extern int get_dma_list(char *);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56bd421c1208..ed289a9c5ccb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2113,6 +2113,7 @@ struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
+int get_filesystem_list(char * buf);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1161dfd7b0d3..f86573126f83 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -11,6 +11,7 @@
 #include <linux/mount.h>
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/fs.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -18,8 +19,6 @@
 
 #include "do_mounts.h"
 
-extern int get_filesystem_list(char * buf);
-
 int __initdata rd_doload;	/* 1 = load RAM disk, 0 = don't load */
 
 int root_mountflags = MS_RDONLY | MS_SILENT;
-- 
cgit 


From 26464378c4af9f7461b9d9e359f98dbd34ab3544 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:36:49 -0800
Subject: proper prototype for vty_init()

Add a proper prototype for vty_init() in include/linux/vt_kern.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c   | 4 ----
 include/linux/vt_kern.h | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index f36fecd3fd26..c927f42c8e4d 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -4048,10 +4048,6 @@ void __init console_init(void)
 	}
 }
 
-#ifdef CONFIG_VT
-extern int vty_init(void);
-#endif
-
 static int __init tty_class_init(void)
 {
 	tty_class = class_create(THIS_MODULE, "tty");
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index feb5e99a1079..9448ffbdcbf6 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -77,6 +77,7 @@ void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
 extern int unbind_con_driver(const struct consw *csw, int first, int last,
 			     int deflt);
+int vty_init(void);
 
 /*
  * vc_screen.c shares this temporary buffer with the console write code so that
-- 
cgit 


From de9330d13eac1f331e63ab1d18c506365b0151f3 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 6 Feb 2008 01:36:54 -0800
Subject: log2.h: Define order_base_2() macro for convenience.

Given a number of places in the tree that need to calculate this value
explicitly, might as well just create a macro for it.

(akpm: must be implemented as a macro for callee typeof() usage)

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/log2.h b/include/linux/log2.h
index c8cf5e8ef171..25b808631cd9 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -190,4 +190,20 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 	__rounddown_pow_of_two(n)		\
  )
 
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ *  ob2(0) = 0
+ *  ob2(1) = 0
+ *  ob2(2) = 1
+ *  ob2(3) = 2
+ *  ob2(4) = 2
+ *  ob2(5) = 3
+ *  ... and so on.
+ */
+
+#define order_base_2(n) ilog2(roundup_pow_of_two(n))
+
 #endif /* _LINUX_LOG2_H */
-- 
cgit 


From 0321155926b32cbc46f6603c6cc455e046b4d9b2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <olsajiri@gmail.com>
Date: Wed, 6 Feb 2008 01:36:55 -0800
Subject: fs: remove dead config CONFIG_HAS_COMPAT_EPOLL_EVENT symbol

Remove dead config CONFIG_HAS_COMPAT_EPOLL_EVENT symbol.

Signed-off-by: Jiri Olsa <olsajiri@gmail.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c            | 49 -------------------------------------------------
 include/linux/compat.h |  8 --------
 2 files changed, 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 69baca5ad608..ee80ff341d37 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2083,51 +2083,6 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
 
 #ifdef CONFIG_EPOLL
 
-#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT
-asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
-			struct compat_epoll_event __user *event)
-{
-	long err = 0;
-	struct compat_epoll_event user;
-	struct epoll_event __user *kernel = NULL;
-
-	if (event) {
-		if (copy_from_user(&user, event, sizeof(user)))
-			return -EFAULT;
-		kernel = compat_alloc_user_space(sizeof(struct epoll_event));
-		err |= __put_user(user.events, &kernel->events);
-		err |= __put_user(user.data, &kernel->data);
-	}
-
-	return err ? err : sys_epoll_ctl(epfd, op, fd, kernel);
-}
-
-
-asmlinkage long compat_sys_epoll_wait(int epfd,
-			struct compat_epoll_event __user *events,
-			int maxevents, int timeout)
-{
-	long i, ret, err = 0;
-	struct epoll_event __user *kbuf;
-	struct epoll_event ev;
-
-	if ((maxevents <= 0) ||
-			(maxevents > (INT_MAX / sizeof(struct epoll_event))))
-		return -EINVAL;
-	kbuf = compat_alloc_user_space(sizeof(struct epoll_event) * maxevents);
-	ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
-	for (i = 0; i < ret; i++) {
-		err |= __get_user(ev.events, &kbuf[i].events);
-		err |= __get_user(ev.data, &kbuf[i].data);
-		err |= __put_user(ev.events, &events->events);
-		err |= __put_user_unaligned(ev.data, &events->data);
-		events++;
-	}
-
-	return err ? -EFAULT: ret;
-}
-#endif	/* CONFIG_HAS_COMPAT_EPOLL_EVENT */
-
 #ifdef TIF_RESTORE_SIGMASK
 asmlinkage long compat_sys_epoll_pwait(int epfd,
 			struct compat_epoll_event __user *events,
@@ -2153,11 +2108,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
 	}
 
-#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT
-	err = compat_sys_epoll_wait(epfd, events, maxevents, timeout);
-#else
 	err = sys_epoll_wait(epfd, events, maxevents, timeout);
-#endif
 
 	/*
 	 * If we changed the signal mask, we need to restore the original one.
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ae0a483bef9b..a671dbff7a1f 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -257,16 +257,8 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 /*
  * epoll (fs/eventpoll.c) compat bits follow ...
  */
-#ifndef CONFIG_HAS_COMPAT_EPOLL_EVENT
 struct epoll_event;
 #define compat_epoll_event	epoll_event
-#else
-asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
-			struct compat_epoll_event __user *event);
-asmlinkage long compat_sys_epoll_wait(int epfd,
-			struct compat_epoll_event __user *events,
-			int maxevents, int timeout);
-#endif
 asmlinkage long compat_sys_epoll_pwait(int epfd,
 			struct compat_epoll_event __user *events,
 			int maxevents, int timeout,
-- 
cgit 


From e7ca2d41a029577a8cff453d1445951d4f96bfd8 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Wed, 6 Feb 2008 01:36:59 -0800
Subject: Document I_SYNC and I_DATASYNC

After some archeology (see http://logfs.org/logfs/inode_state_bits) I
finally figured out what the three I_DIRTY bits do.  Maybe others would
prefer less effort to reach this insight.

Signed-off-by: Joern Engel <joern@logfs.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed289a9c5ccb..e260d9a32c21 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1279,8 +1279,10 @@ struct super_operations {
  *
  * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
  *
- * I_DIRTY_SYNC		Inode itself is dirty.
- * I_DIRTY_DATASYNC	Data-related inode changes pending
+ * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
+ *			fdatasync().  i_atime is the usual cause.
+ * I_DIRTY_DATASYNC	Inode is dirty and must be written on fdatasync(), f.e.
+ *			because i_size changed.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
  * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
  *			are cleared by unlock_new_inode(), called from iget().
@@ -1312,8 +1314,6 @@ struct super_operations {
  *			purpose reduces latency and prevents some filesystem-
  *			specific deadlocks.
  *
- * Q: Why does I_DIRTY_DATASYNC exist?  It appears as if it could be replaced
- *    by (I_DIRTY_SYNC|I_DIRTY_PAGES).
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
  *    I_CLEAR?  If not, why?
-- 
cgit 


From b3242151906372f30f57feaa43b4cac96a23edb1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Feb 2008 01:37:01 -0800
Subject: PERCPU : __percpu_alloc_mask() can dynamically size percpu_data
 storage

Instead of allocating a fix sized array of NR_CPUS pointers for percpu_data,
we can use nr_cpu_ids, which is generally < NR_CPUS.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 2 +-
 mm/allocpercpu.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 50faa0ea28e4..1ac969724bb2 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -54,7 +54,7 @@
 #ifdef CONFIG_SMP
 
 struct percpu_data {
-	void *ptrs[NR_CPUS];
+	void *ptrs[1];
 };
 
 #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 00b02623f008..7e58322b7134 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(__percpu_populate_mask);
  */
 void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
 {
-	void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
+	void *pdata = kzalloc(nr_cpu_ids * sizeof(void *), gfp);
 	void *__pdata = __percpu_disguise(pdata);
 
 	if (unlikely(!pdata))
-- 
cgit 


From f10db6277dfd6dffb80b2182a256d35adb3134bc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 6 Feb 2008 01:37:05 -0800
Subject: Avoid divide in IS_ALIGN

I was happy to discover the brand new IS_ALIGN macro and quickly used it in
my code.  To my dismay I found that the generated code used division to
perform the test.

This patch fixes it by changing the % test to an &.  This avoids the
division.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ff356b2ee478..18222f267bc4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -35,7 +35,7 @@ extern const char linux_proc_banner[];
 #define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
 #define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
-#define IS_ALIGNED(x,a)		(((x) % ((typeof(x))(a))) == 0)
+#define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
-- 
cgit 


From 797074e44d78835adbde2ca527718b0e50226b95 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 6 Feb 2008 01:37:08 -0800
Subject: fs: use list_for_each_entry_reverse and kill sb_entry

Use list_for_each_entry_reverse for super_blocks list and remove
unused sb_entry macro.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c  | 7 ++-----
 include/linux/fs.h | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0b3064079fa5..db80ce9eb1d0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -515,8 +515,7 @@ writeback_inodes(struct writeback_control *wbc)
 	might_sleep();
 	spin_lock(&sb_lock);
 restart:
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+	list_for_each_entry_reverse(sb, &super_blocks, s_list) {
 		if (sb_has_dirty_inodes(sb)) {
 			/* we're making our own get_super here */
 			sb->s_count++;
@@ -581,10 +580,8 @@ static void set_sb_syncing(int val)
 {
 	struct super_block *sb;
 	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+	list_for_each_entry_reverse(sb, &super_blocks, s_list)
 		sb->s_syncing = val;
-	}
 	spin_unlock(&sb_lock);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e260d9a32c21..19aab50c3b8e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -977,7 +977,6 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
-#define sb_entry(list)	list_entry((list), struct super_block, s_list)
 #define S_BIAS (1<<30)
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
-- 
cgit 


From ece95912db94d98e202cbedb8f35206deb29d83d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Feb 2008 01:37:13 -0800
Subject: inotify: send IN_ATTRIB events when link count changes

Currently, no notification event has been sent when inode's link count
changed.  This is inconvenient for the application in some cases:

Suppose you have the following directory structure

    foo/test
    bar/

and you watch test.  If someone does "mv foo/test bar/", you get event
IN_MOVE_SELF and you know something has happened with the file "test".
However if someone does "ln foo/test bar/test" and "rm foo/test" you get no
inotify event for the file "test" (only directories "foo" and "bar" receive
events).

Furthermore it could be argued that link count belongs to file's metadata and
thus IN_ATTRIB should be sent when it changes.

The following patch implements sending of IN_ATTRIB inotify events when link
count of the inode changes, i.e., when a hardlink to the inode is created or
when it is removed.  This event is sent in addition to all the events sent so
far.  In particular, when a last link to a file is removed, IN_ATTRIB event is
sent in addition to IN_DELETE_SELF event.

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Morten Welinder <mwelinder@gmail.com>
Cc: Robert Love <rlove@google.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Steven French <sfrench@us.ibm.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c               |  3 ++-
 include/linux/fsnotify.h | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 73e2e665817a..241cff423653 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2188,6 +2188,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
 	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
+		fsnotify_link_count(dentry->d_inode);
 		d_delete(dentry);
 	}
 
@@ -2360,7 +2361,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&old_dentry->d_inode->i_mutex);
 	if (!error)
-		fsnotify_create(dir, new_dentry);
+		fsnotify_link(dir, old_dentry->d_inode, new_dentry);
 	return error;
 }
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 2bd31fa623b6..d4b7c4ac72e6 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -91,6 +91,14 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_is_dead(inode);
 }
 
+/*
+ * fsnotify_link_count - inode's link count changed
+ */
+static inline void fsnotify_link_count(struct inode *inode)
+{
+	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+}
+
 /*
  * fsnotify_create - 'name' was linked in
  */
@@ -102,6 +110,20 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 }
 
+/*
+ * fsnotify_link - new hardlink in 'inode' directory
+ * Note: We have to pass also the linked inode ptr as some filesystems leave
+ *   new_dentry->d_inode NULL and instantiate inode pointer later
+ */
+static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
+{
+	inode_dir_notify(dir, DN_CREATE);
+	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
+				  inode);
+	fsnotify_link_count(inode);
+	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+}
+
 /*
  * fsnotify_mkdir - directory 'name' was created
  */
-- 
cgit 


From 9cfe015aa424b3c003baba3841a60dd9b5ad319b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Feb 2008 01:37:16 -0800
Subject: get rid of NR_OPEN and introduce a sysctl_nr_open

NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.

Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.

Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.

This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.

[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt  |  8 ++++++++
 Documentation/sysctl/fs.txt         | 10 ++++++++++
 arch/alpha/kernel/osf_sys.c         |  2 +-
 arch/mips/kernel/sysirix.c          |  2 +-
 arch/sparc64/kernel/sparc64_ksyms.c |  1 +
 arch/sparc64/solaris/fs.c           |  2 +-
 arch/sparc64/solaris/timod.c        |  6 ++++--
 fs/file.c                           |  8 +++++---
 include/linux/fs.h                  |  2 +-
 kernel/sys.c                        |  2 +-
 kernel/sysctl.c                     |  8 ++++++++
 11 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e2799b5fafea..5681e2fa1496 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
 Denotes the  number  of  inodes the system has allocated. This number will
 grow and shrink dynamically.
 
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
 nr_free_inodes
 --------------
 
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a35e994..f99254327ae5 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
 - inode-max
 - inode-nr
 - inode-state
+- nr_open
 - overflowuid
 - overflowgid
 - suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
 
 ==============================================================
 
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
 inode-max, inode-nr & inode-state:
 
 As with file handles, the kernel allocates the inode structures
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6413c5f23226..72f9a619a66d 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -430,7 +430,7 @@ sys_getpagesize(void)
 asmlinkage unsigned long
 sys_getdtablesize(void)
 {
-	return NR_OPEN;
+	return sysctl_nr_open;
 }
 
 /*
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 4c477c7ff74a..22fd41e946b2 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
 			retval = NGROUPS_MAX;
 			goto out;
 		case 5:
-			retval = NR_OPEN;
+			retval = sysctl_nr_open;
 			goto out;
 		case 6:
 			retval = 1;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 60765e314bd8..8649635d6d74 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
 EXPORT_SYMBOL(sys_geteuid);
 EXPORT_SYMBOL(sys_getuid);
 EXPORT_SYMBOL(sys_getegid);
+EXPORT_SYMBOL(sysctl_nr_open);
 EXPORT_SYMBOL(sys_getgid);
 EXPORT_SYMBOL(svr4_getcontext);
 EXPORT_SYMBOL(svr4_setcontext);
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 61be597bf430..9311bfe4f2f7 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
 	case 3: /* UL_GMEMLIM */
 		return current->signal->rlim[RLIMIT_DATA].rlim_cur;
 	case 4: /* UL_GDESLIM */
-		return NR_OPEN;
+		return sysctl_nr_open;
 	}
 	return -EINVAL;
 }
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c
index a9d32ceabf26..f53123c02c2b 100644
--- a/arch/sparc64/solaris/timod.c
+++ b/arch/sparc64/solaris/timod.c
@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
 
 	SOLD("entry");
 	lock_kernel();
-	if(fd >= NR_OPEN) goto out;
+	if (fd >= sysctl_nr_open)
+		goto out;
 
 	fdt = files_fdtable(current->files);
 	filp = fdt->fd[fd];
@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
 
 	SOLD("entry");
 	lock_kernel();
-	if(fd >= NR_OPEN) goto out;
+	if (fd >= sysctl_nr_open)
+		goto out;
 
 	fdt = files_fdtable(current->files);
 	filp = fdt->fd[fd];
diff --git a/fs/file.c b/fs/file.c
index c5575de01113..5110acb1c9ef 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -24,6 +24,8 @@ struct fdtable_defer {
 	struct fdtable *next;
 };
 
+int sysctl_nr_open __read_mostly = 1024*1024;
+
 /*
  * We use this list to defer free fdtables that have vmalloced
  * sets/arrays. By keeping a per-cpu list, we avoid having to embed
@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
 	nr /= (1024 / sizeof(struct file *));
 	nr = roundup_pow_of_two(nr + 1);
 	nr *= (1024 / sizeof(struct file *));
-	if (nr > NR_OPEN)
-		nr = NR_OPEN;
+	if (nr > sysctl_nr_open)
+		nr = sysctl_nr_open;
 
 	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
 	if (!fdt)
@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
 	if (nr < fdt->max_fds)
 		return 0;
 	/* Can we expand? */
-	if (nr >= NR_OPEN)
+	if (nr >= sysctl_nr_open)
 		return -EMFILE;
 
 	/* All good, so we try */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19aab50c3b8e..109734bf6377 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,7 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-#define NR_OPEN (1024*1024)	/* Absolute upper limit on fd num */
+extern int sysctl_nr_open;
 #define INR_OPEN 1024		/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
diff --git a/kernel/sys.c b/kernel/sys.c
index 53de35fc8245..2b8e2daa9d95 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
+	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
 		return -EPERM;
 
 	retval = security_task_setrlimit(resource, &new_rlim);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5e2ad5bf88e2..86daaa26d120 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1202,6 +1202,14 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nr_open",
+		.data		= &sysctl_nr_open,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= FS_DENTRY,
 		.procname	= "dentry-state",
-- 
cgit 


From d99c4f6b13b3149bc83703ab1493beaeaaaf8a2d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 6 Feb 2008 01:37:25 -0800
Subject: Remove rcu_assign_pointer() penalty for NULL pointers

The rcu_assign_pointer() primitive currently unconditionally executes a
memory barrier, even when a NULL pointer is being assigned.  This has lead
some to avoid using rcu_assign_pointer() for NULL pointers, which loses the
self-documenting advantages of rcu_assign_pointer() This patch uses
__builtin_const_p() to omit needless memory barriers for NULL-pointer
assignments at compile time with no runtime penalty, as discussed in the
following thread:

	http://www.mail-archive.com/netdev@vger.kernel.org/msg54852.html

Tested on x86_64 and ppc64, also compiled the four cases (NULL/non-NULL
and const/non-const) with gcc version 4.1.2, and hand-checked the
assembly output.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rcupdate.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d32c14de270e..37a642c54871 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -174,10 +174,13 @@ struct rcu_head {
  * code.
  */
 
-#define rcu_assign_pointer(p, v)	({ \
-						smp_wmb(); \
-						(p) = (v); \
-					})
+#define rcu_assign_pointer(p, v) \
+	({ \
+		if (!__builtin_constant_p(v) || \
+		    ((v) != NULL)) \
+			smp_wmb(); \
+		(p) = (v); \
+	})
 
 /**
  * synchronize_sched - block until all CPUs have exited any non-preemptive
-- 
cgit 


From eb31005eaf3ca0705b404a78eb92f714c9449276 Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Wed, 6 Feb 2008 01:37:31 -0800
Subject: drivers/char/tty_io.c: remove pty_sem

I couldn't find any users, so removing it..

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Acked-by: Alan Cox <alan@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c | 1 -
 include/linux/tty.h   | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index c927f42c8e4d..b62bb67c3414 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3721,7 +3721,6 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
 	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc);
-	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 402de892b3ed..5824a9777ad7 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -74,7 +74,6 @@ struct tty_buffer {
 
 struct tty_bufhead {
 	struct delayed_work work;
-	struct semaphore pty_sem;
 	spinlock_t lock;
 	struct tty_buffer *head;	/* Queue head */
 	struct tty_buffer *tail;	/* Active buffer */
-- 
cgit 


From 4749380ed688884a3bd3328b1bf32529d96aa49b Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Wed, 6 Feb 2008 01:37:32 -0800
Subject: drivers/isdn/i4l/isdn_tty.c: remove write_sem

I couldn't find any users, so removing it..

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Cc: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/i4l/isdn_tty.c | 1 -
 include/linux/isdn.h        | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 9cb6e5021adb..133eb18e65cc 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -1917,7 +1917,6 @@ isdn_tty_modem_init(void)
 		info->owner = THIS_MODULE;
 #endif
 		spin_lock_init(&info->readlock);
-		init_MUTEX(&info->write_sem);
 		sprintf(info->last_cause, "0000");
 		sprintf(info->last_num, "none");
 		info->last_dir = 0;
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index d0ecc8eebfbf..9cb2855bb170 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -507,7 +507,6 @@ typedef struct modem_info {
   struct ktermios	normal_termios;  /* For saving termios structs     */
   struct ktermios	callout_termios;
   wait_queue_head_t	open_wait, close_wait;
-  struct semaphore      write_sem;
   spinlock_t	        readlock;
 } modem_info;
 
-- 
cgit 


From 1a669c2f16d478cb7f4452e5fb8d09320831f4a1 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 6 Feb 2008 01:37:37 -0800
Subject: Add arch_ptrace_stop

This adds support to allow asm/ptrace.h to define two new macros,
arch_ptrace_stop_needed and arch_ptrace_stop.  These control special
machine-specific actions to be done before a ptrace stop.  The new code
compiles away to nothing when the new macros are not defined.  This is the
case on all machines to begin with.

On ia64, these macros will be defined to solve the long-standing issue of
ptrace vs register backing store.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Matthew Wilcox <willy@debian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 35 +++++++++++++++++++++++++++++++++++
 kernel/signal.c        | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 515bff053de8..6ab80714a916 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -204,6 +204,41 @@ static inline void user_enable_block_step(struct task_struct *task)
 }
 #endif	/* arch_has_block_step */
 
+#ifndef arch_ptrace_stop_needed
+/**
+ * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
+ * @code:	current->exit_code value ptrace will stop with
+ * @info:	siginfo_t pointer (or %NULL) for signal ptrace will stop with
+ *
+ * This is called with the siglock held, to decide whether or not it's
+ * necessary to release the siglock and call arch_ptrace_stop() with the
+ * same @code and @info arguments.  It can be defined to a constant if
+ * arch_ptrace_stop() is never required, or always is.  On machines where
+ * this makes sense, it should be defined to a quick test to optimize out
+ * calling arch_ptrace_stop() when it would be superfluous.  For example,
+ * if the thread has not been back to user mode since the last stop, the
+ * thread state might indicate that nothing needs to be done.
+ */
+#define arch_ptrace_stop_needed(code, info)	(0)
+#endif
+
+#ifndef arch_ptrace_stop
+/**
+ * arch_ptrace_stop - Do machine-specific work before stopping for ptrace
+ * @code:	current->exit_code value ptrace will stop with
+ * @info:	siginfo_t pointer (or %NULL) for signal ptrace will stop with
+ *
+ * This is called with no locks held when arch_ptrace_stop_needed() has
+ * just returned nonzero.  It is allowed to block, e.g. for user memory
+ * access.  The arch can have machine-specific work to be done before
+ * ptrace stops.  On ia64, register backing store gets written back to user
+ * memory here.  Since this can be costly (requires dropping the siglock),
+ * we only do it when the arch requires it for this particular stop, as
+ * indicated by arch_ptrace_stop_needed().
+ */
+#define arch_ptrace_stop(code, info)		do { } while (0)
+#endif
+
 #endif
 
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index e46971560fcb..5d30ff561847 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1577,6 +1577,17 @@ static inline int may_ptrace_stop(void)
 	return 1;
 }
 
+/*
+ * Return nonzero if there is a SIGKILL that should be waking us up.
+ * Called with the siglock held.
+ */
+static int sigkill_pending(struct task_struct *tsk)
+{
+	return ((sigismember(&tsk->pending.signal, SIGKILL) ||
+		 sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) &&
+		!unlikely(sigismember(&tsk->blocked, SIGKILL)));
+}
+
 /*
  * This must be called with current->sighand->siglock held.
  *
@@ -1590,6 +1601,26 @@ static inline int may_ptrace_stop(void)
  */
 static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
 {
+	int killed = 0;
+
+	if (arch_ptrace_stop_needed(exit_code, info)) {
+		/*
+		 * The arch code has something special to do before a
+		 * ptrace stop.  This is allowed to block, e.g. for faults
+		 * on user stack pages.  We can't keep the siglock while
+		 * calling arch_ptrace_stop, so we must release it now.
+		 * To preserve proper semantics, we must do this before
+		 * any signal bookkeeping like checking group_stop_count.
+		 * Meanwhile, a SIGKILL could come in before we retake the
+		 * siglock.  That must prevent us from sleeping in TASK_TRACED.
+		 * So after regaining the lock, we must check for SIGKILL.
+		 */
+		spin_unlock_irq(&current->sighand->siglock);
+		arch_ptrace_stop(exit_code, info);
+		spin_lock_irq(&current->sighand->siglock);
+		killed = sigkill_pending(current);
+	}
+
 	/*
 	 * If there is a group stop in progress,
 	 * we must participate in the bookkeeping.
@@ -1605,7 +1636,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
 	spin_unlock_irq(&current->sighand->siglock);
 	try_to_freeze();
 	read_lock(&tasklist_lock);
-	if (may_ptrace_stop()) {
+	if (!unlikely(killed) && may_ptrace_stop()) {
 		do_notify_parent_cldstop(current, CLD_TRAPPED);
 		read_unlock(&tasklist_lock);
 		schedule();
-- 
cgit 


From dc999159bbc1c542f310160c56ed8b701a7d6252 Mon Sep 17 00:00:00 2001
From: Luís P Mendes <luis.p.mendes@gmail.com>
Date: Wed, 6 Feb 2008 01:37:43 -0800
Subject: parport: add support for the Quatech SPPXP-100 Parallel port PCI
 ExpressCard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added pci device id for the Quatech SPPXP-100 ExpressCard - 0x278 - to
include/linux/pci_id.h

Modified drivers/parport/parport_pc.c to support the Quatech SPPXP-100 Parallel port PCI ExpressCard

[akpm@linux-foundation.org: build fix]
Signed-off-by: Luís P Mendes <luis.p.mendes@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/parport_pc.c | 5 +++++
 include/linux/pci_ids.h      | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index e9743d3efaf6..db26b4017bc1 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2767,6 +2767,7 @@ enum parport_pc_pci_cards {
 	netmos_9755,
 	netmos_9805,
 	netmos_9815,
+	quatech_sppxp100,
 };
 
 
@@ -2843,6 +2844,7 @@ static struct parport_pc_pci {
         /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
 	/* netmos_9805 */               { 1, { { 0, -1 }, } }, /* untested */
 	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
+	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
 static const struct pci_device_id parport_pc_pci_tbl[] = {
@@ -2926,6 +2928,9 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9805 },
 	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9815,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 },
+	/* Quatech SPPXP-100 Parallel port PCI ExpressCard */
+	{ PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SPPXP_100,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
 	{ 0, } /* terminate list */
 };
 MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 39d32837265b..df6dd79a0d3b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1765,6 +1765,7 @@
 #define PCI_DEVICE_ID_QUATECH_DSC100	0x0020
 #define PCI_DEVICE_ID_QUATECH_ESC100D	0x0050
 #define PCI_DEVICE_ID_QUATECH_ESC100M	0x0060
+#define PCI_DEVICE_ID_QUATECH_SPPXP_100 0x0278
 
 #define PCI_VENDOR_ID_SEALEVEL		0x135e
 #define PCI_DEVICE_ID_SEALEVEL_U530	0x7101
-- 
cgit 


From 1bf47346d75790ebd2563d909d48046961c7ffd5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Feb 2008 01:37:56 -0800
Subject: kernel/sys.c: get rid of expensive divides in groups_sort()

groups_sort() can be quite long if user loads a large gid table.

This is because GROUP_AT(group_info, some_integer) uses an integer divide.
So having to do XXX thousand divides during one syscall can lead to very
high latencies.  (NGROUPS_MAX=65536)

In the past (25 Mar 2006), an analog problem was found in groups_search()
(commit d74beb9f33a5f16d2965f11b275e401f225c949d ) and at that time I
changed some variables to unsigned int.

I believe that a more generic fix is to make sure NGROUPS_PER_BLOCK is
unsigned.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 kernel/sys.c          | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c13be3a21e8..7c8ca05c3cae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,7 +810,7 @@ static inline int above_background_load(void)
 
 struct io_context;			/* See blkdev.h */
 #define NGROUPS_SMALL		32
-#define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
+#define NGROUPS_PER_BLOCK	((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
 struct group_info {
 	int ngroups;
 	atomic_t usage;
diff --git a/kernel/sys.c b/kernel/sys.c
index 2b8e2daa9d95..e3c08d4324de 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1145,16 +1145,16 @@ static int groups_to_user(gid_t __user *grouplist,
     struct group_info *group_info)
 {
 	int i;
-	int count = group_info->ngroups;
+	unsigned int count = group_info->ngroups;
 
 	for (i = 0; i < group_info->nblocks; i++) {
-		int cp_count = min(NGROUPS_PER_BLOCK, count);
-		int off = i * NGROUPS_PER_BLOCK;
-		int len = cp_count * sizeof(*grouplist);
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
 
-		if (copy_to_user(grouplist+off, group_info->blocks[i], len))
+		if (copy_to_user(grouplist, group_info->blocks[i], len))
 			return -EFAULT;
 
+		grouplist += NGROUPS_PER_BLOCK;
 		count -= cp_count;
 	}
 	return 0;
@@ -1165,16 +1165,16 @@ static int groups_from_user(struct group_info *group_info,
     gid_t __user *grouplist)
 {
 	int i;
-	int count = group_info->ngroups;
+	unsigned int count = group_info->ngroups;
 
 	for (i = 0; i < group_info->nblocks; i++) {
-		int cp_count = min(NGROUPS_PER_BLOCK, count);
-		int off = i * NGROUPS_PER_BLOCK;
-		int len = cp_count * sizeof(*grouplist);
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
 
-		if (copy_from_user(group_info->blocks[i], grouplist+off, len))
+		if (copy_from_user(group_info->blocks[i], grouplist, len))
 			return -EFAULT;
 
+		grouplist += NGROUPS_PER_BLOCK;
 		count -= cp_count;
 	}
 	return 0;
-- 
cgit 


From 6ffc787a4492ac627315aaeafdfdc0a5c3028582 Mon Sep 17 00:00:00 2001
From: David Fries <david@fries.net>
Date: Wed, 6 Feb 2008 01:38:04 -0800
Subject: system timer: fix crash in <100Hz system timer

The kernel has a divide by zero crash when trying to run the system timer
less than 100Hz.  The problem is x/(HZ/USER_HZ) and related.  Now
x*(USER_HZ/HZ) will be used if HZ<USER_HZ.

I'm running the Linux kernel under qemu and went to run a slower system
timer to take less CPU (and battery) on the host.  I found that the kernel
paniced under emulation because of a divide by zero in three places.  Here
is the patch.  The base git was updated today 01-05-2008.  I went for a
20Hz system time by adding config HZ_20 etc to kernel/Kconfig.hz.  With
this patch I verified the system timer by looking at /proc/interrupts.

[akpm@linux-foundation.org: partially clean up the macro maze]
Signed-off-by: David Fries <david@fries.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h |  4 ++++
 kernel/time.c        | 11 +++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 302eb727ecb8..e8cae54e8d88 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -173,7 +173,11 @@ typedef struct acct acct_t;
 static inline u32 jiffies_to_AHZ(unsigned long x)
 {
 #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0
+# if HZ < AHZ
+	return x * (AHZ / HZ);
+# else
 	return x / (HZ / AHZ);
+# endif
 #else
         u64 tmp = (u64)x * TICK_NSEC;
         do_div(tmp, (NSEC_PER_SEC / AHZ));
diff --git a/kernel/time.c b/kernel/time.c
index 4064c0566e77..be5c8cb93582 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -566,7 +566,11 @@ EXPORT_SYMBOL(jiffies_to_timeval);
 clock_t jiffies_to_clock_t(long x)
 {
 #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+	return x * (USER_HZ / HZ);
+# else
 	return x / (HZ / USER_HZ);
+# endif
 #else
 	u64 tmp = (u64)x * TICK_NSEC;
 	do_div(tmp, (NSEC_PER_SEC / USER_HZ));
@@ -599,7 +603,12 @@ EXPORT_SYMBOL(clock_t_to_jiffies);
 u64 jiffies_64_to_clock_t(u64 x)
 {
 #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+	x *= USER_HZ;
+	do_div(x, HZ);
+# else
 	do_div(x, HZ / USER_HZ);
+# endif
 #else
 	/*
 	 * There are better ways that don't overflow early,
@@ -611,7 +620,6 @@ u64 jiffies_64_to_clock_t(u64 x)
 #endif
 	return x;
 }
-
 EXPORT_SYMBOL(jiffies_64_to_clock_t);
 
 u64 nsec_to_clock_t(u64 x)
@@ -646,7 +654,6 @@ u64 get_jiffies_64(void)
 	} while (read_seqretry(&xtime_lock, seq));
 	return ret;
 }
-
 EXPORT_SYMBOL(get_jiffies_64);
 #endif
 
-- 
cgit 


From f47cd9b553aaada602449204513b5a5b29cba263 Mon Sep 17 00:00:00 2001
From: Abhishek Sagar <sagar.abhishek@gmail.com>
Date: Wed, 6 Feb 2008 01:38:22 -0800
Subject: kprobes: kretprobe user entry-handler

Provide support to add an optional user defined callback to be run at
function entry of a kretprobe'd function.  Also modify the kprobe smoke
tests to include an entry-handler during the kretprobe sanity test.

Signed-off-by: Abhishek Sagar <sagar.abhishek@gmail.com>
Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Acked-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kprobes.txt | 81 ++++++++++++++++++++++++++++++++++++++++-------
 include/linux/kprobes.h   |  3 ++
 kernel/kprobes.c          |  9 +++++-
 kernel/test_kprobes.c     | 16 ++++++++--
 4 files changed, 94 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 53a63890aea4..30c101761d0d 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -96,7 +96,9 @@ or in registers (e.g., for x86_64 or for an i386 fastcall function).
 The jprobe will work in either case, so long as the handler's
 prototype matches that of the probed function.
 
-1.3 How Does a Return Probe Work?
+1.3 Return Probes
+
+1.3.1 How Does a Return Probe Work?
 
 When you call register_kretprobe(), Kprobes establishes a kprobe at
 the entry to the function.  When the probed function is called and this
@@ -107,9 +109,9 @@ At boot time, Kprobes registers a kprobe at the trampoline.
 
 When the probed function executes its return instruction, control
 passes to the trampoline and that probe is hit.  Kprobes' trampoline
-handler calls the user-specified handler associated with the kretprobe,
-then sets the saved instruction pointer to the saved return address,
-and that's where execution resumes upon return from the trap.
+handler calls the user-specified return handler associated with the
+kretprobe, then sets the saved instruction pointer to the saved return
+address, and that's where execution resumes upon return from the trap.
 
 While the probed function is executing, its return address is
 stored in an object of type kretprobe_instance.  Before calling
@@ -131,6 +133,30 @@ zero when the return probe is registered, and is incremented every
 time the probed function is entered but there is no kretprobe_instance
 object available for establishing the return probe.
 
+1.3.2 Kretprobe entry-handler
+
+Kretprobes also provides an optional user-specified handler which runs
+on function entry. This handler is specified by setting the entry_handler
+field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the
+function entry is hit, the user-defined entry_handler, if any, is invoked.
+If the entry_handler returns 0 (success) then a corresponding return handler
+is guaranteed to be called upon function return. If the entry_handler
+returns a non-zero error then Kprobes leaves the return address as is, and
+the kretprobe has no further effect for that particular function instance.
+
+Multiple entry and return handler invocations are matched using the unique
+kretprobe_instance object associated with them. Additionally, a user
+may also specify per return-instance private data to be part of each
+kretprobe_instance object. This is especially useful when sharing private
+data between corresponding user entry and return handlers. The size of each
+private data object can be specified at kretprobe registration time by
+setting the data_size field of the kretprobe struct. This data can be
+accessed through the data field of each kretprobe_instance object.
+
+In case probed function is entered but there is no kretprobe_instance
+object available, then in addition to incrementing the nmissed count,
+the user entry_handler invocation is also skipped.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
@@ -274,6 +300,8 @@ of interest:
 - ret_addr: the return address
 - rp: points to the corresponding kretprobe object
 - task: points to the corresponding task struct
+- data: points to per return-instance private data; see "Kretprobe
+	entry-handler" for details.
 
 The regs_return_value(regs) macro provides a simple abstraction to
 extract the return value from the appropriate register as defined by
@@ -556,23 +584,52 @@ report failed calls to sys_open().
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/ktime.h>
+
+/* per-instance private data */
+struct my_data {
+	ktime_t entry_stamp;
+};
 
 static const char *probed_func = "sys_open";
 
-/* Return-probe handler: If the probed function fails, log the return value. */
-static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+/* Timestamp function entry. */
+static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	struct my_data *data;
+
+	if(!current->mm)
+		return 1; /* skip kernel threads */
+
+	data = (struct my_data *)ri->data;
+	data->entry_stamp = ktime_get();
+	return 0;
+}
+
+/* If the probed function failed, log the return value and duration.
+ * Duration may turn out to be zero consistently, depending upon the
+ * granularity of time accounting on the platform. */
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	int retval = regs_return_value(regs);
+	struct my_data *data = (struct my_data *)ri->data;
+	s64 delta;
+	ktime_t now;
+
 	if (retval < 0) {
-		printk("%s returns %d\n", probed_func, retval);
+		now = ktime_get();
+		delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
+		printk("%s: return val = %d (duration = %lld ns)\n",
+		       probed_func, retval, delta);
 	}
 	return 0;
 }
 
 static struct kretprobe my_kretprobe = {
-	.handler = ret_handler,
-	/* Probe up to 20 instances concurrently. */
-	.maxactive = 20
+	.handler = return_handler,
+	.entry_handler = entry_handler,
+	.data_size = sizeof(struct my_data),
+	.maxactive = 20, /* probe up to 20 instances concurrently */
 };
 
 static int __init kretprobe_init(void)
@@ -584,7 +641,7 @@ static int __init kretprobe_init(void)
 		printk("register_kretprobe failed, returned %d\n", ret);
 		return -1;
 	}
-	printk("Planted return probe at %p\n", my_kretprobe.kp.addr);
+	printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name);
 	return 0;
 }
 
@@ -594,7 +651,7 @@ static void __exit kretprobe_exit(void)
 	printk("kretprobe unregistered\n");
 	/* nmissed > 0 suggests that maxactive was set too low. */
 	printk("Missed probing %d instances of %s\n",
-		my_kretprobe.nmissed, probed_func);
+	       my_kretprobe.nmissed, probed_func);
 }
 
 module_init(kretprobe_init)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 6168c0a44172..4a6ce82ba039 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -152,8 +152,10 @@ static inline int arch_trampoline_kprobe(struct kprobe *p)
 struct kretprobe {
 	struct kprobe kp;
 	kretprobe_handler_t handler;
+	kretprobe_handler_t entry_handler;
 	int maxactive;
 	int nmissed;
+	size_t data_size;
 	struct hlist_head free_instances;
 	struct hlist_head used_instances;
 };
@@ -164,6 +166,7 @@ struct kretprobe_instance {
 	struct kretprobe *rp;
 	kprobe_opcode_t *ret_addr;
 	struct task_struct *task;
+	char data[0];
 };
 
 struct kretprobe_blackpoint {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d0493eafea3e..7a86e6432338 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -699,6 +699,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 				 struct kretprobe_instance, uflist);
 		ri->rp = rp;
 		ri->task = current;
+
+		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+			spin_unlock_irqrestore(&kretprobe_lock, flags);
+			return 0;
+		}
+
 		arch_prepare_kretprobe(ri, regs);
 
 		/* XXX(hch): why is there no hlist_move_head? */
@@ -745,7 +751,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	INIT_HLIST_HEAD(&rp->used_instances);
 	INIT_HLIST_HEAD(&rp->free_instances);
 	for (i = 0; i < rp->maxactive; i++) {
-		inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
+		inst = kmalloc(sizeof(struct kretprobe_instance) +
+			       rp->data_size, GFP_KERNEL);
 		if (inst == NULL) {
 			free_rp_inst(rp);
 			return -ENOMEM;
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 88cdb109e13c..06b6395b45b2 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -135,6 +135,12 @@ static int test_jprobe(void)
 #ifdef CONFIG_KRETPROBES
 static u32 krph_val;
 
+static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	krph_val = (rand1 / div_factor);
+	return 0;
+}
+
 static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	unsigned long ret = regs_return_value(regs);
@@ -144,13 +150,19 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 		printk(KERN_ERR "Kprobe smoke test failed: "
 				"incorrect value in kretprobe handler\n");
 	}
+	if (krph_val == 0) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"call to kretprobe entry handler failed\n");
+	}
 
-	krph_val = (rand1 / div_factor);
+	krph_val = rand1;
 	return 0;
 }
 
 static struct kretprobe rp = {
 	.handler	= return_handler,
+	.entry_handler  = entry_handler,
 	.kp.symbol_name = "kprobe_target"
 };
 
@@ -167,7 +179,7 @@ static int test_kretprobe(void)
 
 	ret = kprobe_target(rand1);
 	unregister_kretprobe(&rp);
-	if (krph_val == 0) {
+	if (krph_val != rand1) {
 		printk(KERN_ERR "Kprobe smoke test failed: "
 				"kretprobe handler not called\n");
 		handler_errors++;
-- 
cgit 


From ad8dc96e3b2c3e28854e0de4ab49351ed547b30c Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Wed, 6 Feb 2008 01:39:01 -0800
Subject: w1-gpio: add GPIO w1 bus master driver

Add a GPIO 1-wire bus master driver.  The driver used the GPIO API to
control the wire and the GPIO pin can be specified using platform data
similar to i2c-gpio.  The driver was tested with AT91SAM9260 + DS2401.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/w1/masters/00-INDEX |   2 +
 Documentation/w1/masters/w1-gpio  |  33 ++++++++++
 drivers/w1/masters/Kconfig        |  10 +++
 drivers/w1/masters/Makefile       |   1 +
 drivers/w1/masters/w1-gpio.c      | 124 ++++++++++++++++++++++++++++++++++++++
 include/linux/w1-gpio.h           |  23 +++++++
 6 files changed, 193 insertions(+)
 create mode 100644 Documentation/w1/masters/w1-gpio
 create mode 100644 drivers/w1/masters/w1-gpio.c
 create mode 100644 include/linux/w1-gpio.h

(limited to 'include/linux')

diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX
index 752613c4cea2..7b0ceaaad7af 100644
--- a/Documentation/w1/masters/00-INDEX
+++ b/Documentation/w1/masters/00-INDEX
@@ -4,3 +4,5 @@ ds2482
 	- The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
 ds2490
 	- The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
+w1-gpio
+	- GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio
new file mode 100644
index 000000000000..af5d3b4aa851
--- /dev/null
+++ b/Documentation/w1/masters/w1-gpio
@@ -0,0 +1,33 @@
+Kernel driver w1-gpio
+=====================
+
+Author: Ville Syrjala <syrjala@sci.fi>
+
+
+Description
+-----------
+
+GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
+wire and the GPIO pin can be specified using platform data.
+
+
+Example (mach-at91)
+-------------------
+
+#include <linux/w1-gpio.h>
+
+static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
+	.pin		= AT91_PIN_PB20,
+	.is_open_drain	= 1,
+};
+
+static struct platform_device foo_w1_device = {
+	.name			= "w1-gpio",
+	.id			= -1,
+	.dev.platform_data	= &foo_w1_gpio_pdata,
+};
+
+...
+	at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
+	at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
+	platform_device_register(&foo_w1_device);
diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig
index 8236d447adf5..c4493091c655 100644
--- a/drivers/w1/masters/Kconfig
+++ b/drivers/w1/masters/Kconfig
@@ -42,5 +42,15 @@ config W1_MASTER_DS1WM
 	  in HP iPAQ devices like h5xxx, h2200, and ASIC3-based like
 	  hx4700.
 
+config W1_MASTER_GPIO
+	tristate "GPIO 1-wire busmaster"
+	depends on GENERIC_GPIO
+	help
+	  Say Y here if you want to communicate with your 1-wire devices using
+	  GPIO pins. This driver uses the GPIO API to control the wire.
+
+	  This support is also available as a module.  If so, the module
+	  will be called w1-gpio.ko.
+
 endmenu
 
diff --git a/drivers/w1/masters/Makefile b/drivers/w1/masters/Makefile
index 11551b328186..1420b5bbdda8 100644
--- a/drivers/w1/masters/Makefile
+++ b/drivers/w1/masters/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_W1_MASTER_MATROX)		+= matrox_w1.o
 obj-$(CONFIG_W1_MASTER_DS2490)		+= ds2490.o
 obj-$(CONFIG_W1_MASTER_DS2482)		+= ds2482.o
 obj-$(CONFIG_W1_MASTER_DS1WM)		+= ds1wm.o
+obj-$(CONFIG_W1_MASTER_GPIO)		+= w1-gpio.o
diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c
new file mode 100644
index 000000000000..9e1138a75e8b
--- /dev/null
+++ b/drivers/w1/masters/w1-gpio.c
@@ -0,0 +1,124 @@
+/*
+ * w1-gpio - GPIO w1 bus master driver
+ *
+ * Copyright (C) 2007 Ville Syrjala <syrjala@sci.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/w1-gpio.h>
+
+#include "../w1.h"
+#include "../w1_int.h"
+
+#include <asm/gpio.h>
+
+static void w1_gpio_write_bit_dir(void *data, u8 bit)
+{
+	struct w1_gpio_platform_data *pdata = data;
+
+	if (bit)
+		gpio_direction_input(pdata->pin);
+	else
+		gpio_direction_output(pdata->pin, 0);
+}
+
+static void w1_gpio_write_bit_val(void *data, u8 bit)
+{
+	struct w1_gpio_platform_data *pdata = data;
+
+	gpio_set_value(pdata->pin, bit);
+}
+
+static u8 w1_gpio_read_bit(void *data)
+{
+	struct w1_gpio_platform_data *pdata = data;
+
+	return gpio_get_value(pdata->pin);
+}
+
+static int __init w1_gpio_probe(struct platform_device *pdev)
+{
+	struct w1_bus_master *master;
+	struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+	int err;
+
+	if (!pdata)
+		return -ENXIO;
+
+	master = kzalloc(sizeof(struct w1_bus_master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	err = gpio_request(pdata->pin, "w1");
+	if (err)
+		goto free_master;
+
+	master->data = pdata;
+	master->read_bit = w1_gpio_read_bit;
+
+	if (pdata->is_open_drain) {
+		gpio_direction_output(pdata->pin, 1);
+		master->write_bit = w1_gpio_write_bit_val;
+	} else {
+		gpio_direction_input(pdata->pin);
+		master->write_bit = w1_gpio_write_bit_dir;
+	}
+
+	err = w1_add_master_device(master);
+	if (err)
+		goto free_gpio;
+
+	platform_set_drvdata(pdev, master);
+
+	return 0;
+
+ free_gpio:
+	gpio_free(pdata->pin);
+ free_master:
+	kfree(master);
+
+	return err;
+}
+
+static int __exit w1_gpio_remove(struct platform_device *pdev)
+{
+	struct w1_bus_master *master = platform_get_drvdata(pdev);
+	struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+	w1_remove_master_device(master);
+	gpio_free(pdata->pin);
+	kfree(master);
+
+	return 0;
+}
+
+static struct platform_driver w1_gpio_driver = {
+	.driver = {
+		.name	= "w1-gpio",
+		.owner	= THIS_MODULE,
+	},
+	.remove	= __exit_p(w1_gpio_remove),
+};
+
+static int __init w1_gpio_init(void)
+{
+	return platform_driver_probe(&w1_gpio_driver, w1_gpio_probe);
+}
+
+static void __exit w1_gpio_exit(void)
+{
+	platform_driver_unregister(&w1_gpio_driver);
+}
+
+module_init(w1_gpio_init);
+module_exit(w1_gpio_exit);
+
+MODULE_DESCRIPTION("GPIO w1 bus master driver");
+MODULE_AUTHOR("Ville Syrjala <syrjala@sci.fi>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h
new file mode 100644
index 000000000000..9797fec7748a
--- /dev/null
+++ b/include/linux/w1-gpio.h
@@ -0,0 +1,23 @@
+/*
+ * w1-gpio interface to platform code
+ *
+ * Copyright (C) 2007 Ville Syrjala <syrjala@sci.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_W1_GPIO_H
+#define _LINUX_W1_GPIO_H
+
+/**
+ * struct w1_gpio_platform_data - Platform-dependent data for w1-gpio
+ * @pin: GPIO pin to use
+ * @is_open_drain: GPIO pin is configured as open drain
+ */
+struct w1_gpio_platform_data {
+	unsigned int pin;
+	unsigned int is_open_drain:1;
+};
+
+#endif /* _LINUX_W1_GPIO_H */
-- 
cgit 


From d1c057e31734426ba385e02291d97bdf06ba0c1d Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Date: Wed, 6 Feb 2008 01:39:02 -0800
Subject: gpio: rename pca9539 driver

First part of an extension to let the pca9539 driver support more chips,
starting with pca9534, pca9535, pca9536, pca9537, and pca9538.

This renames the files and modifies the Makefile.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Makefile       |   2 +-
 drivers/gpio/pca9539.c      | 271 --------------------------------------------
 drivers/gpio/pca953x.c      | 271 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/pca9539.h |  18 ---
 include/linux/i2c/pca953x.h |  18 +++
 5 files changed, 290 insertions(+), 290 deletions(-)
 delete mode 100644 drivers/gpio/pca9539.c
 create mode 100644 drivers/gpio/pca953x.c
 delete mode 100644 include/linux/i2c/pca9539.h
 create mode 100644 include/linux/i2c/pca953x.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 470ecd6aa778..16dda772b44b 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,5 +5,5 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
-obj-$(CONFIG_GPIO_PCA9539)	+= pca9539.o
+obj-$(CONFIG_GPIO_PCA9539)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/pca9539.c b/drivers/gpio/pca9539.c
deleted file mode 100644
index 3e85c92a7d59..000000000000
--- a/drivers/gpio/pca9539.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- *  pca9539.c - 16-bit I/O port with interrupt and reset
- *
- *  Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
- *  Copyright (C) 2007 Marvell International Ltd.
- *
- *  Derived from drivers/i2c/chips/pca9539.c
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/i2c.h>
-#include <linux/i2c/pca9539.h>
-
-#include <asm/gpio.h>
-
-
-#define NR_PCA9539_GPIOS	16
-
-#define PCA9539_INPUT		0
-#define PCA9539_OUTPUT		2
-#define PCA9539_INVERT		4
-#define PCA9539_DIRECTION	6
-
-struct pca9539_chip {
-	unsigned gpio_start;
-	uint16_t reg_output;
-	uint16_t reg_direction;
-
-	struct i2c_client *client;
-	struct gpio_chip gpio_chip;
-};
-
-/* NOTE:  we can't currently rely on fault codes to come from SMBus
- * calls, so we map all errors to EIO here and return zero otherwise.
- */
-static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val)
-{
-	if (i2c_smbus_write_word_data(chip->client, reg, val) < 0)
-		return -EIO;
-	else
-		return 0;
-}
-
-static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val)
-{
-	int ret;
-
-	ret = i2c_smbus_read_word_data(chip->client, reg);
-	if (ret < 0) {
-		dev_err(&chip->client->dev, "failed reading register\n");
-		return -EIO;
-	}
-
-	*val = (uint16_t)ret;
-	return 0;
-}
-
-static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off)
-{
-	struct pca9539_chip *chip;
-	uint16_t reg_val;
-	int ret;
-
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
-
-	reg_val = chip->reg_direction | (1u << off);
-	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
-	if (ret)
-		return ret;
-
-	chip->reg_direction = reg_val;
-	return 0;
-}
-
-static int pca9539_gpio_direction_output(struct gpio_chip *gc,
-		unsigned off, int val)
-{
-	struct pca9539_chip *chip;
-	uint16_t reg_val;
-	int ret;
-
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
-
-	/* set output level */
-	if (val)
-		reg_val = chip->reg_output | (1u << off);
-	else
-		reg_val = chip->reg_output & ~(1u << off);
-
-	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
-	if (ret)
-		return ret;
-
-	chip->reg_output = reg_val;
-
-	/* then direction */
-	reg_val = chip->reg_direction & ~(1u << off);
-	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
-	if (ret)
-		return ret;
-
-	chip->reg_direction = reg_val;
-	return 0;
-}
-
-static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off)
-{
-	struct pca9539_chip *chip;
-	uint16_t reg_val;
-	int ret;
-
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
-
-	ret = pca9539_read_reg(chip, PCA9539_INPUT, &reg_val);
-	if (ret < 0) {
-		/* NOTE:  diagnostic already emitted; that's all we should
-		 * do unless gpio_*_value_cansleep() calls become different
-		 * from their nonsleeping siblings (and report faults).
-		 */
-		return 0;
-	}
-
-	return (reg_val & (1u << off)) ? 1 : 0;
-}
-
-static void pca9539_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
-{
-	struct pca9539_chip *chip;
-	uint16_t reg_val;
-	int ret;
-
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
-
-	if (val)
-		reg_val = chip->reg_output | (1u << off);
-	else
-		reg_val = chip->reg_output & ~(1u << off);
-
-	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
-	if (ret)
-		return;
-
-	chip->reg_output = reg_val;
-}
-
-static int pca9539_init_gpio(struct pca9539_chip *chip)
-{
-	struct gpio_chip *gc;
-
-	gc = &chip->gpio_chip;
-
-	gc->direction_input  = pca9539_gpio_direction_input;
-	gc->direction_output = pca9539_gpio_direction_output;
-	gc->get = pca9539_gpio_get_value;
-	gc->set = pca9539_gpio_set_value;
-
-	gc->base = chip->gpio_start;
-	gc->ngpio = NR_PCA9539_GPIOS;
-	gc->label = "pca9539";
-
-	return gpiochip_add(gc);
-}
-
-static int __devinit pca9539_probe(struct i2c_client *client)
-{
-	struct pca9539_platform_data *pdata;
-	struct pca9539_chip *chip;
-	int ret;
-
-	pdata = client->dev.platform_data;
-	if (pdata == NULL)
-		return -ENODEV;
-
-	chip = kzalloc(sizeof(struct pca9539_chip), GFP_KERNEL);
-	if (chip == NULL)
-		return -ENOMEM;
-
-	chip->client = client;
-
-	chip->gpio_start = pdata->gpio_base;
-
-	/* initialize cached registers from their original values.
-	 * we can't share this chip with another i2c master.
-	 */
-	ret = pca9539_read_reg(chip, PCA9539_OUTPUT, &chip->reg_output);
-	if (ret)
-		goto out_failed;
-
-	ret = pca9539_read_reg(chip, PCA9539_DIRECTION, &chip->reg_direction);
-	if (ret)
-		goto out_failed;
-
-	/* set platform specific polarity inversion */
-	ret = pca9539_write_reg(chip, PCA9539_INVERT, pdata->invert);
-	if (ret)
-		goto out_failed;
-
-	ret = pca9539_init_gpio(chip);
-	if (ret)
-		goto out_failed;
-
-	if (pdata->setup) {
-		ret = pdata->setup(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
-		if (ret < 0)
-			dev_warn(&client->dev, "setup failed, %d\n", ret);
-	}
-
-	i2c_set_clientdata(client, chip);
-	return 0;
-
-out_failed:
-	kfree(chip);
-	return ret;
-}
-
-static int pca9539_remove(struct i2c_client *client)
-{
-	struct pca9539_platform_data *pdata = client->dev.platform_data;
-	struct pca9539_chip *chip = i2c_get_clientdata(client);
-	int ret = 0;
-
-	if (pdata->teardown) {
-		ret = pdata->teardown(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
-		if (ret < 0) {
-			dev_err(&client->dev, "%s failed, %d\n",
-					"teardown", ret);
-			return ret;
-		}
-	}
-
-	ret = gpiochip_remove(&chip->gpio_chip);
-	if (ret) {
-		dev_err(&client->dev, "%s failed, %d\n",
-				"gpiochip_remove()", ret);
-		return ret;
-	}
-
-	kfree(chip);
-	return 0;
-}
-
-static struct i2c_driver pca9539_driver = {
-	.driver = {
-		.name	= "pca9539",
-	},
-	.probe		= pca9539_probe,
-	.remove		= pca9539_remove,
-};
-
-static int __init pca9539_init(void)
-{
-	return i2c_add_driver(&pca9539_driver);
-}
-module_init(pca9539_init);
-
-static void __exit pca9539_exit(void)
-{
-	i2c_del_driver(&pca9539_driver);
-}
-module_exit(pca9539_exit);
-
-MODULE_AUTHOR("eric miao <eric.miao@marvell.com>");
-MODULE_DESCRIPTION("GPIO expander driver for PCA9539");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
new file mode 100644
index 000000000000..7fae4e500dfd
--- /dev/null
+++ b/drivers/gpio/pca953x.c
@@ -0,0 +1,271 @@
+/*
+ *  pca9539.c - 16-bit I/O port with interrupt and reset
+ *
+ *  Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *
+ *  Derived from drivers/i2c/chips/pca9539.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+
+#include <asm/gpio.h>
+
+
+#define NR_PCA9539_GPIOS	16
+
+#define PCA9539_INPUT		0
+#define PCA9539_OUTPUT		2
+#define PCA9539_INVERT		4
+#define PCA9539_DIRECTION	6
+
+struct pca9539_chip {
+	unsigned gpio_start;
+	uint16_t reg_output;
+	uint16_t reg_direction;
+
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+};
+
+/* NOTE:  we can't currently rely on fault codes to come from SMBus
+ * calls, so we map all errors to EIO here and return zero otherwise.
+ */
+static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val)
+{
+	if (i2c_smbus_write_word_data(chip->client, reg, val) < 0)
+		return -EIO;
+	else
+		return 0;
+}
+
+static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_word_data(chip->client, reg);
+	if (ret < 0) {
+		dev_err(&chip->client->dev, "failed reading register\n");
+		return -EIO;
+	}
+
+	*val = (uint16_t)ret;
+	return 0;
+}
+
+static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	reg_val = chip->reg_direction | (1u << off);
+	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_direction = reg_val;
+	return 0;
+}
+
+static int pca9539_gpio_direction_output(struct gpio_chip *gc,
+		unsigned off, int val)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	/* set output level */
+	if (val)
+		reg_val = chip->reg_output | (1u << off);
+	else
+		reg_val = chip->reg_output & ~(1u << off);
+
+	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_output = reg_val;
+
+	/* then direction */
+	reg_val = chip->reg_direction & ~(1u << off);
+	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_direction = reg_val;
+	return 0;
+}
+
+static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	ret = pca9539_read_reg(chip, PCA9539_INPUT, &reg_val);
+	if (ret < 0) {
+		/* NOTE:  diagnostic already emitted; that's all we should
+		 * do unless gpio_*_value_cansleep() calls become different
+		 * from their nonsleeping siblings (and report faults).
+		 */
+		return 0;
+	}
+
+	return (reg_val & (1u << off)) ? 1 : 0;
+}
+
+static void pca9539_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	if (val)
+		reg_val = chip->reg_output | (1u << off);
+	else
+		reg_val = chip->reg_output & ~(1u << off);
+
+	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	if (ret)
+		return;
+
+	chip->reg_output = reg_val;
+}
+
+static int pca9539_init_gpio(struct pca9539_chip *chip)
+{
+	struct gpio_chip *gc;
+
+	gc = &chip->gpio_chip;
+
+	gc->direction_input  = pca9539_gpio_direction_input;
+	gc->direction_output = pca9539_gpio_direction_output;
+	gc->get = pca9539_gpio_get_value;
+	gc->set = pca9539_gpio_set_value;
+
+	gc->base = chip->gpio_start;
+	gc->ngpio = NR_PCA9539_GPIOS;
+	gc->label = "pca9539";
+
+	return gpiochip_add(gc);
+}
+
+static int __devinit pca9539_probe(struct i2c_client *client)
+{
+	struct pca9539_platform_data *pdata;
+	struct pca9539_chip *chip;
+	int ret;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(struct pca9539_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+
+	chip->gpio_start = pdata->gpio_base;
+
+	/* initialize cached registers from their original values.
+	 * we can't share this chip with another i2c master.
+	 */
+	ret = pca9539_read_reg(chip, PCA9539_OUTPUT, &chip->reg_output);
+	if (ret)
+		goto out_failed;
+
+	ret = pca9539_read_reg(chip, PCA9539_DIRECTION, &chip->reg_direction);
+	if (ret)
+		goto out_failed;
+
+	/* set platform specific polarity inversion */
+	ret = pca9539_write_reg(chip, PCA9539_INVERT, pdata->invert);
+	if (ret)
+		goto out_failed;
+
+	ret = pca9539_init_gpio(chip);
+	if (ret)
+		goto out_failed;
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, chip);
+	return 0;
+
+out_failed:
+	kfree(chip);
+	return ret;
+}
+
+static int pca9539_remove(struct i2c_client *client)
+{
+	struct pca9539_platform_data *pdata = client->dev.platform_data;
+	struct pca9539_chip *chip = i2c_get_clientdata(client);
+	int ret = 0;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "%s failed, %d\n",
+					"teardown", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver pca9539_driver = {
+	.driver = {
+		.name	= "pca9539",
+	},
+	.probe		= pca9539_probe,
+	.remove		= pca9539_remove,
+};
+
+static int __init pca9539_init(void)
+{
+	return i2c_add_driver(&pca9539_driver);
+}
+module_init(pca9539_init);
+
+static void __exit pca9539_exit(void)
+{
+	i2c_del_driver(&pca9539_driver);
+}
+module_exit(pca9539_exit);
+
+MODULE_AUTHOR("eric miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for PCA9539");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/pca9539.h b/include/linux/i2c/pca9539.h
deleted file mode 100644
index 611d84ab7a30..000000000000
--- a/include/linux/i2c/pca9539.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* platform data for the PCA9539 16-bit I/O expander driver */
-
-struct pca9539_platform_data {
-	/* number of the first GPIO */
-	unsigned	gpio_base;
-
-	/* initial polarity inversion setting */
-	uint16_t	invert;
-
-	void		*context;	/* param to setup/teardown */
-
-	int		(*setup)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-	int		(*teardown)(struct i2c_client *client,
-				unsigned gpio, unsigned ngpio,
-				void *context);
-};
diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
new file mode 100644
index 000000000000..611d84ab7a30
--- /dev/null
+++ b/include/linux/i2c/pca953x.h
@@ -0,0 +1,18 @@
+/* platform data for the PCA9539 16-bit I/O expander driver */
+
+struct pca9539_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	/* initial polarity inversion setting */
+	uint16_t	invert;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
-- 
cgit 


From f3dc3630f687aa4664b663143f69d99d83195c54 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Date: Wed, 6 Feb 2008 01:39:03 -0800
Subject: gpio: rename pca953x symbols

This second part of an extension to support more pca953x chips renames the C
and Kconfig symbols.  All affected files were updated by sed, except for a
couple of obvious exceptions.  It also updates the Kconfig helptext.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  11 +++--
 drivers/gpio/Makefile       |   2 +-
 drivers/gpio/pca953x.c      | 110 ++++++++++++++++++++++----------------------
 include/linux/i2c/pca953x.h |   2 +-
 4 files changed, 63 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 74fac0f5c348..bbd28342e771 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -27,15 +27,16 @@ config DEBUG_GPIO
 
 comment "I2C GPIO expanders:"
 
-config GPIO_PCA9539
-	tristate "PCA9539 16-bit I/O port"
+config GPIO_PCA953X
+	tristate "PCA953x I/O ports"
 	depends on I2C
 	help
-	  Say yes here to support the PCA9539 16-bit I/O port. These
-	  parts are made by NXP and TI.
+	  Say yes here to support the PCA9534 (8-bit), PCA9535 (16-bit),
+	  PCA9536 (4-bit), PCA9537 (4-bit), PCA9538 (8-bit), and PCA9539
+	  (16-bit) I/O ports. These parts are made by NXP and TI.
 
 	  This driver can also be built as a module.  If so, the module
-	  will be called pca9539.
+	  will be called pca953x.
 
 config GPIO_PCF857X
 	tristate "PCF857x, PCA857x, and PCA967x I2C GPIO expanders"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 16dda772b44b..fdde9923cf33 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,5 +5,5 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
-obj-$(CONFIG_GPIO_PCA9539)	+= pca953x.o
+obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 7fae4e500dfd..ef1fe24bcbaf 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -1,5 +1,5 @@
 /*
- *  pca9539.c - 16-bit I/O port with interrupt and reset
+ *  pca953x.c - 16-bit I/O port with interrupt and reset
  *
  *  Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
  *  Copyright (C) 2007 Marvell International Ltd.
@@ -19,14 +19,14 @@
 #include <asm/gpio.h>
 
 
-#define NR_PCA9539_GPIOS	16
+#define NR_PCA953X_GPIOS	16
 
-#define PCA9539_INPUT		0
-#define PCA9539_OUTPUT		2
-#define PCA9539_INVERT		4
-#define PCA9539_DIRECTION	6
+#define PCA953X_INPUT		0
+#define PCA953X_OUTPUT		2
+#define PCA953X_INVERT		4
+#define PCA953X_DIRECTION	6
 
-struct pca9539_chip {
+struct pca953x_chip {
 	unsigned gpio_start;
 	uint16_t reg_output;
 	uint16_t reg_direction;
@@ -38,7 +38,7 @@ struct pca9539_chip {
 /* NOTE:  we can't currently rely on fault codes to come from SMBus
  * calls, so we map all errors to EIO here and return zero otherwise.
  */
-static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val)
+static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
 {
 	if (i2c_smbus_write_word_data(chip->client, reg, val) < 0)
 		return -EIO;
@@ -46,7 +46,7 @@ static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val)
 		return 0;
 }
 
-static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val)
+static int pca953x_read_reg(struct pca953x_chip *chip, int reg, uint16_t *val)
 {
 	int ret;
 
@@ -60,16 +60,16 @@ static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val)
 	return 0;
 }
 
-static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+static int pca953x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
 {
-	struct pca9539_chip *chip;
+	struct pca953x_chip *chip;
 	uint16_t reg_val;
 	int ret;
 
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
 	reg_val = chip->reg_direction | (1u << off);
-	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val);
 	if (ret)
 		return ret;
 
@@ -77,14 +77,14 @@ static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off)
 	return 0;
 }
 
-static int pca9539_gpio_direction_output(struct gpio_chip *gc,
+static int pca953x_gpio_direction_output(struct gpio_chip *gc,
 		unsigned off, int val)
 {
-	struct pca9539_chip *chip;
+	struct pca953x_chip *chip;
 	uint16_t reg_val;
 	int ret;
 
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
 	/* set output level */
 	if (val)
@@ -92,7 +92,7 @@ static int pca9539_gpio_direction_output(struct gpio_chip *gc,
 	else
 		reg_val = chip->reg_output & ~(1u << off);
 
-	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val);
 	if (ret)
 		return ret;
 
@@ -100,7 +100,7 @@ static int pca9539_gpio_direction_output(struct gpio_chip *gc,
 
 	/* then direction */
 	reg_val = chip->reg_direction & ~(1u << off);
-	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val);
 	if (ret)
 		return ret;
 
@@ -108,15 +108,15 @@ static int pca9539_gpio_direction_output(struct gpio_chip *gc,
 	return 0;
 }
 
-static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off)
+static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
 {
-	struct pca9539_chip *chip;
+	struct pca953x_chip *chip;
 	uint16_t reg_val;
 	int ret;
 
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
-	ret = pca9539_read_reg(chip, PCA9539_INPUT, &reg_val);
+	ret = pca953x_read_reg(chip, PCA953X_INPUT, &reg_val);
 	if (ret < 0) {
 		/* NOTE:  diagnostic already emitted; that's all we should
 		 * do unless gpio_*_value_cansleep() calls become different
@@ -128,55 +128,55 @@ static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off)
 	return (reg_val & (1u << off)) ? 1 : 0;
 }
 
-static void pca9539_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+static void pca953x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
 {
-	struct pca9539_chip *chip;
+	struct pca953x_chip *chip;
 	uint16_t reg_val;
 	int ret;
 
-	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
 	if (val)
 		reg_val = chip->reg_output | (1u << off);
 	else
 		reg_val = chip->reg_output & ~(1u << off);
 
-	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val);
 	if (ret)
 		return;
 
 	chip->reg_output = reg_val;
 }
 
-static int pca9539_init_gpio(struct pca9539_chip *chip)
+static int pca953x_init_gpio(struct pca953x_chip *chip)
 {
 	struct gpio_chip *gc;
 
 	gc = &chip->gpio_chip;
 
-	gc->direction_input  = pca9539_gpio_direction_input;
-	gc->direction_output = pca9539_gpio_direction_output;
-	gc->get = pca9539_gpio_get_value;
-	gc->set = pca9539_gpio_set_value;
+	gc->direction_input  = pca953x_gpio_direction_input;
+	gc->direction_output = pca953x_gpio_direction_output;
+	gc->get = pca953x_gpio_get_value;
+	gc->set = pca953x_gpio_set_value;
 
 	gc->base = chip->gpio_start;
-	gc->ngpio = NR_PCA9539_GPIOS;
-	gc->label = "pca9539";
+	gc->ngpio = NR_PCA953X_GPIOS;
+	gc->label = "pca953x";
 
 	return gpiochip_add(gc);
 }
 
-static int __devinit pca9539_probe(struct i2c_client *client)
+static int __devinit pca953x_probe(struct i2c_client *client)
 {
-	struct pca9539_platform_data *pdata;
-	struct pca9539_chip *chip;
+	struct pca953x_platform_data *pdata;
+	struct pca953x_chip *chip;
 	int ret;
 
 	pdata = client->dev.platform_data;
 	if (pdata == NULL)
 		return -ENODEV;
 
-	chip = kzalloc(sizeof(struct pca9539_chip), GFP_KERNEL);
+	chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
@@ -187,20 +187,20 @@ static int __devinit pca9539_probe(struct i2c_client *client)
 	/* initialize cached registers from their original values.
 	 * we can't share this chip with another i2c master.
 	 */
-	ret = pca9539_read_reg(chip, PCA9539_OUTPUT, &chip->reg_output);
+	ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output);
 	if (ret)
 		goto out_failed;
 
-	ret = pca9539_read_reg(chip, PCA9539_DIRECTION, &chip->reg_direction);
+	ret = pca953x_read_reg(chip, PCA953X_DIRECTION, &chip->reg_direction);
 	if (ret)
 		goto out_failed;
 
 	/* set platform specific polarity inversion */
-	ret = pca9539_write_reg(chip, PCA9539_INVERT, pdata->invert);
+	ret = pca953x_write_reg(chip, PCA953X_INVERT, pdata->invert);
 	if (ret)
 		goto out_failed;
 
-	ret = pca9539_init_gpio(chip);
+	ret = pca953x_init_gpio(chip);
 	if (ret)
 		goto out_failed;
 
@@ -219,10 +219,10 @@ out_failed:
 	return ret;
 }
 
-static int pca9539_remove(struct i2c_client *client)
+static int pca953x_remove(struct i2c_client *client)
 {
-	struct pca9539_platform_data *pdata = client->dev.platform_data;
-	struct pca9539_chip *chip = i2c_get_clientdata(client);
+	struct pca953x_platform_data *pdata = client->dev.platform_data;
+	struct pca953x_chip *chip = i2c_get_clientdata(client);
 	int ret = 0;
 
 	if (pdata->teardown) {
@@ -246,26 +246,26 @@ static int pca9539_remove(struct i2c_client *client)
 	return 0;
 }
 
-static struct i2c_driver pca9539_driver = {
+static struct i2c_driver pca953x_driver = {
 	.driver = {
-		.name	= "pca9539",
+		.name	= "pca953x",
 	},
-	.probe		= pca9539_probe,
-	.remove		= pca9539_remove,
+	.probe		= pca953x_probe,
+	.remove		= pca953x_remove,
 };
 
-static int __init pca9539_init(void)
+static int __init pca953x_init(void)
 {
-	return i2c_add_driver(&pca9539_driver);
+	return i2c_add_driver(&pca953x_driver);
 }
-module_init(pca9539_init);
+module_init(pca953x_init);
 
-static void __exit pca9539_exit(void)
+static void __exit pca953x_exit(void)
 {
-	i2c_del_driver(&pca9539_driver);
+	i2c_del_driver(&pca953x_driver);
 }
-module_exit(pca9539_exit);
+module_exit(pca953x_exit);
 
 MODULE_AUTHOR("eric miao <eric.miao@marvell.com>");
-MODULE_DESCRIPTION("GPIO expander driver for PCA9539");
+MODULE_DESCRIPTION("GPIO expander driver for PCA953x");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 611d84ab7a30..3c7361217df8 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -1,6 +1,6 @@
 /* platform data for the PCA9539 16-bit I/O expander driver */
 
-struct pca9539_platform_data {
+struct pca953x_platform_data {
 	/* number of the first GPIO */
 	unsigned	gpio_base;
 
-- 
cgit 


From dfcffa467b4112fa6f1631c9d6bf7759c3bbe75a Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Wed, 6 Feb 2008 01:39:24 -0800
Subject: sm501fb: control panel pin usage with platform data flags

This patch makes it possible to control panel pins usage with flags passed
from the platform data.  Without this patch the sm501fb driver always controls
the VBIASEN and FPEN pins.  The polarity and use of these pins are very
platform specific, so this patch introduces the flags
SM501FB_FLAG_PANEL_USE_VBIASEN and SM501FB_FLAG_PANEL_USE_FPEN which enable
the use of these pins.

This patch is needed to support the a Sharp LQ104V1DG21 lcd panel on SuperH
platforms such as R2D-1 and R2D-PLUS boards.  Letting the sm501fb driver
control the FPEN and VBIASEN pins like today just results in lcd panel
flicker.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 41 +++++++++++++++++++++++++----------------
 include/linux/sm501.h   |  2 ++
 2 files changed, 27 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 58f200c69be3..e1d6085bc347 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -641,6 +641,7 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 {
 	unsigned long control;
 	void __iomem *ctrl_reg = fbi->regs + SM501_DC_PANEL_CONTROL;
+	struct sm501_platdata_fbsub *pd = fbi->pdata->fb_pnl;
 
 	control = readl(ctrl_reg);
 
@@ -657,26 +658,34 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 		sm501fb_sync_regs(fbi);
 		mdelay(10);
 
-		control |= SM501_DC_PANEL_CONTROL_BIAS;	/* VBIASEN */
-		writel(control, ctrl_reg);
-		sm501fb_sync_regs(fbi);
-		mdelay(10);
-
-		control |= SM501_DC_PANEL_CONTROL_FPEN;
-		writel(control, ctrl_reg);
+		if (pd->flags & SM501FB_FLAG_PANEL_USE_VBIASEN) {
+			control |= SM501_DC_PANEL_CONTROL_BIAS;	/* VBIASEN */
+			writel(control, ctrl_reg);
+			sm501fb_sync_regs(fbi);
+			mdelay(10);
+		}
 
+		if (pd->flags & SM501FB_FLAG_PANEL_USE_FPEN) {
+			control |= SM501_DC_PANEL_CONTROL_FPEN;
+			writel(control, ctrl_reg);
+			sm501fb_sync_regs(fbi);
+			mdelay(10);
+		}
 	} else if (!to && (control & SM501_DC_PANEL_CONTROL_VDD) != 0) {
 		/* disable panel power */
+		if (pd->flags & SM501FB_FLAG_PANEL_USE_FPEN) {
+			control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+			writel(control, ctrl_reg);
+			sm501fb_sync_regs(fbi);
+			mdelay(10);
+		}
 
-		control &= ~SM501_DC_PANEL_CONTROL_FPEN;
-		writel(control, ctrl_reg);
-		sm501fb_sync_regs(fbi);
-		mdelay(10);
-
-		control &= ~SM501_DC_PANEL_CONTROL_BIAS;
-		writel(control, ctrl_reg);
-		sm501fb_sync_regs(fbi);
-		mdelay(10);
+		if (pd->flags & SM501FB_FLAG_PANEL_USE_VBIASEN) {
+			control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+			writel(control, ctrl_reg);
+			sm501fb_sync_regs(fbi);
+			mdelay(10);
+		}
 
 		control &= ~SM501_DC_PANEL_CONTROL_DATA;
 		writel(control, ctrl_reg);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 9e3aaad6fe4d..932a9efee8a5 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -70,6 +70,8 @@ extern unsigned long sm501_gpio_get(struct device *dev,
 #define SM501FB_FLAG_DISABLE_AT_EXIT	(1<<1)
 #define SM501FB_FLAG_USE_HWCURSOR	(1<<2)
 #define SM501FB_FLAG_USE_HWACCEL	(1<<3)
+#define SM501FB_FLAG_PANEL_USE_FPEN	(1<<4)
+#define SM501FB_FLAG_PANEL_USE_VBIASEN	(1<<5)
 
 struct sm501_platdata_fbsub {
 	struct fb_videomode	*def_mode;
-- 
cgit 


From b47490c9bc73d0b34e4c194db40de183e592e446 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:50 -0800
Subject: md: Update md bitmap during resync.

Currently an md array with a write-intent bitmap does not updated that bitmap
to reflect successful partial resync.  Rather the entire bitmap is updated
when the resync completes.

This is because there is no guarentee that resync requests will complete in
order, and tracking each request individually is unnecessarily burdensome.

However there is value in regularly updating the bitmap, so add code to
periodically pause while all pending sync requests complete, then update the
bitmap.  Doing this only every few seconds (the same as the bitmap update
time) does not notciably affect resync performance.

[snitzer@gmail.com: export bitmap_cond_end_sync]
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Mike Snitzer" <snitzer@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bitmap.c         | 35 ++++++++++++++++++++++++++++++-----
 drivers/md/raid1.c          |  1 +
 drivers/md/raid10.c         |  2 ++
 drivers/md/raid5.c          |  3 +++
 include/linux/raid/bitmap.h |  3 +++
 5 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 1b1ef3130e6e..9231cd700fe8 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1348,14 +1348,38 @@ void bitmap_close_sync(struct bitmap *bitmap)
 	 */
 	sector_t sector = 0;
 	int blocks;
-	if (!bitmap) return;
+	if (!bitmap)
+		return;
 	while (sector < bitmap->mddev->resync_max_sectors) {
 		bitmap_end_sync(bitmap, sector, &blocks, 0);
-/*
-		if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n",
-					 (unsigned long long)sector, blocks);
-*/		sector += blocks;
+		sector += blocks;
+	}
+}
+
+void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
+{
+	sector_t s = 0;
+	int blocks;
+
+	if (!bitmap)
+		return;
+	if (sector == 0) {
+		bitmap->last_end_sync = jiffies;
+		return;
+	}
+	if (time_before(jiffies, (bitmap->last_end_sync
+				  + bitmap->daemon_sleep * HZ)))
+		return;
+	wait_event(bitmap->mddev->recovery_wait,
+		   atomic_read(&bitmap->mddev->recovery_active) == 0);
+
+	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
+	s = 0;
+	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
+		bitmap_end_sync(bitmap, s, &blocks, 0);
+		s += blocks;
 	}
+	bitmap->last_end_sync = jiffies;
 }
 
 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
@@ -1565,3 +1589,4 @@ EXPORT_SYMBOL(bitmap_start_sync);
 EXPORT_SYMBOL(bitmap_end_sync);
 EXPORT_SYMBOL(bitmap_unplug);
 EXPORT_SYMBOL(bitmap_close_sync);
+EXPORT_SYMBOL(bitmap_cond_end_sync);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4a69c416e045..e0b8d0dd7a87 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1684,6 +1684,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
 
+	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 	raise_barrier(conf);
 
 	conf->next_resync = sector_nr;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5cdcc9386200..ba125277c6c4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1670,6 +1670,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
 
+	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+
 	/* Again, very different code for resync and recovery.
 	 * Both must result in an r10bio with a list of bios that
 	 * have bi_end_io, bi_sector, bi_bdev set,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e8c8157b02fc..388a974d63ef 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3753,6 +3753,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
 	}
 
+
+	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+
 	pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
 	sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
 	if (sh == NULL) {
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index 306a1d1a5af0..e51b531cd0b2 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -244,6 +244,8 @@ struct bitmap {
 	 */
 	unsigned long daemon_lastrun; /* jiffies of last run */
 	unsigned long daemon_sleep; /* how many seconds between updates? */
+	unsigned long last_end_sync; /* when we lasted called end_sync to
+				      * update bitmap with resync progress */
 
 	atomic_t pending_writes; /* pending writes to the bitmap file */
 	wait_queue_head_t write_wait;
@@ -275,6 +277,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded);
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
 void bitmap_close_sync(struct bitmap *bitmap);
+void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
 
 void bitmap_unplug(struct bitmap *bitmap);
 void bitmap_daemon_work(struct bitmap *bitmap);
-- 
cgit 


From e691063a61f7f72a7d2882eb744b07a520cde23b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:51 -0800
Subject: md: support 'external' metadata for md arrays

- Add a state flag 'external' to indicate that the metadata is managed
  externally (by user-space) so important changes need to be
  left of user-space to handle.
  Alternates are non-persistant ('none') where there is no stable metadata -
  after the  array is stopped there is no record of it's status - and
  internal which can be version 0.90 or version 1.x
  These are selected by writing to the 'metadata' attribute.

- move the updating of superblocks (sync_sbs) to after we have checked if
  there are any superblocks or not.

- New array state 'write_pending'.  This means that the metadata records
  the array as 'clean', but a write has been requested, so the metadata has
  to be updated to record a 'dirty' array before the write can continue.
  This change is reported to md by writing 'active' to the array_state
  attribute.

- tidy up marking of sb_dirty:
   - don't set sb_dirty when resync finishes as md_check_recovery
     calls md_update_sb when the sync thread finishes anyway.
   - Don't set sb_dirty in multipath_run as the array might not be dirty.
   - don't mark superblock dirty when switching to 'clean' if there
     is no internal superblock (if external, userspace can choose to
     update the superblock whenever it chooses to).

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 77 +++++++++++++++++++++++++++++++++++------------
 include/linux/raid/md_k.h |  3 ++
 2 files changed, 61 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c28a120b4161..e2782a04012d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -778,7 +778,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->major_version = 0;
 		mddev->minor_version = sb->minor_version;
 		mddev->patch_version = sb->patch_version;
-		mddev->persistent = ! sb->not_persistent;
+		mddev->persistent = 1;
+		mddev->external = 0;
 		mddev->chunk_size = sb->chunk_size;
 		mddev->ctime = sb->ctime;
 		mddev->utime = sb->utime;
@@ -904,7 +905,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	sb->size  = mddev->size;
 	sb->raid_disks = mddev->raid_disks;
 	sb->md_minor = mddev->md_minor;
-	sb->not_persistent = !mddev->persistent;
+	sb->not_persistent = 0;
 	sb->utime = mddev->utime;
 	sb->state = 0;
 	sb->events_hi = (mddev->events>>32);
@@ -1158,6 +1159,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->major_version = 1;
 		mddev->patch_version = 0;
 		mddev->persistent = 1;
+		mddev->external = 0;
 		mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
 		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
 		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
@@ -1696,18 +1698,20 @@ repeat:
 		MD_BUG();
 		mddev->events --;
 	}
-	sync_sbs(mddev, nospares);
 
 	/*
 	 * do not write anything to disk if using
 	 * nonpersistent superblocks
 	 */
 	if (!mddev->persistent) {
-		clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+		if (!mddev->external)
+			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+
 		spin_unlock_irq(&mddev->write_lock);
 		wake_up(&mddev->sb_wait);
 		return;
 	}
+	sync_sbs(mddev, nospares);
 	spin_unlock_irq(&mddev->write_lock);
 
 	dprintk(KERN_INFO 
@@ -2425,6 +2429,8 @@ array_state_show(mddev_t *mddev, char *page)
 		case 0:
 			if (mddev->in_sync)
 				st = clean;
+			else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+				st = write_pending;
 			else if (mddev->safemode)
 				st = active_idle;
 			else
@@ -2455,11 +2461,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
 		break;
 	case clear:
 		/* stopping an active array */
-		if (mddev->pers) {
-			if (atomic_read(&mddev->active) > 1)
-				return -EBUSY;
-			err = do_md_stop(mddev, 0);
-		}
+		if (atomic_read(&mddev->active) > 1)
+			return -EBUSY;
+		err = do_md_stop(mddev, 0);
 		break;
 	case inactive:
 		/* stopping an active array */
@@ -2467,7 +2471,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
 			if (atomic_read(&mddev->active) > 1)
 				return -EBUSY;
 			err = do_md_stop(mddev, 2);
-		}
+		} else
+			err = 0; /* already inactive */
 		break;
 	case suspended:
 		break; /* not supported yet */
@@ -2495,9 +2500,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
 			restart_array(mddev);
 			spin_lock_irq(&mddev->write_lock);
 			if (atomic_read(&mddev->writes_pending) == 0) {
-				mddev->in_sync = 1;
-				set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-			}
+				if (mddev->in_sync == 0) {
+					mddev->in_sync = 1;
+					if (mddev->persistent)
+						set_bit(MD_CHANGE_CLEAN,
+							&mddev->flags);
+				}
+				err = 0;
+			} else
+				err = -EBUSY;
 			spin_unlock_irq(&mddev->write_lock);
 		} else {
 			mddev->ro = 0;
@@ -2508,7 +2519,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
 	case active:
 		if (mddev->pers) {
 			restart_array(mddev);
-			clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+			if (mddev->external)
+				clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
 			wake_up(&mddev->sb_wait);
 			err = 0;
 		} else {
@@ -2659,7 +2671,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
 
 
 /* Metdata version.
- * This is either 'none' for arrays with externally managed metadata,
+ * This is one of
+ *   'none' for arrays with no metadata (good luck...)
+ *   'external' for arrays with externally managed metadata,
  * or N.M for internally known formats
  */
 static ssize_t
@@ -2668,6 +2682,8 @@ metadata_show(mddev_t *mddev, char *page)
 	if (mddev->persistent)
 		return sprintf(page, "%d.%d\n",
 			       mddev->major_version, mddev->minor_version);
+	else if (mddev->external)
+		return sprintf(page, "external:%s\n", mddev->metadata_type);
 	else
 		return sprintf(page, "none\n");
 }
@@ -2682,6 +2698,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
 
 	if (cmd_match(buf, "none")) {
 		mddev->persistent = 0;
+		mddev->external = 0;
+		mddev->major_version = 0;
+		mddev->minor_version = 90;
+		return len;
+	}
+	if (strncmp(buf, "external:", 9) == 0) {
+		int namelen = len-9;
+		if (namelen >= sizeof(mddev->metadata_type))
+			namelen = sizeof(mddev->metadata_type)-1;
+		strncpy(mddev->metadata_type, buf+9, namelen);
+		mddev->metadata_type[namelen] = 0;
+		if (namelen && mddev->metadata_type[namelen-1] == '\n')
+			mddev->metadata_type[--namelen] = 0;
+		mddev->persistent = 0;
+		mddev->external = 1;
 		mddev->major_version = 0;
 		mddev->minor_version = 90;
 		return len;
@@ -2698,6 +2729,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
 	mddev->major_version = major;
 	mddev->minor_version = minor;
 	mddev->persistent = 1;
+	mddev->external = 0;
 	return len;
 }
 
@@ -3524,6 +3556,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
 		mddev->raid_disks = 0;
 		mddev->recovery_cp = 0;
 		mddev->reshape_position = MaxSector;
+		mddev->external = 0;
 
 	} else if (mddev->pers)
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4165,13 +4198,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 	else
 		mddev->recovery_cp = 0;
 	mddev->persistent    = ! info->not_persistent;
+	mddev->external	     = 0;
 
 	mddev->layout        = info->layout;
 	mddev->chunk_size    = info->chunk_size;
 
 	mddev->max_disks     = MD_SB_DISKS;
 
-	mddev->flags         = 0;
+	if (mddev->persistent)
+		mddev->flags         = 0;
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
 	mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -4982,7 +5017,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
 					   mddev->major_version,
 					   mddev->minor_version);
 			}
-		} else
+		} else if (mddev->external)
+			seq_printf(seq, " super external:%s",
+				   mddev->metadata_type);
+		else
 			seq_printf(seq, " super non-persistent");
 
 		if (mddev->pers) {
@@ -5589,7 +5627,7 @@ void md_check_recovery(mddev_t *mddev)
 	}
 
 	if ( ! (
-		mddev->flags ||
+		(mddev->flags && !mddev->external) ||
 		test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
 		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
 		(mddev->safemode == 1) ||
@@ -5605,7 +5643,8 @@ void md_check_recovery(mddev_t *mddev)
 		if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
 		    !mddev->in_sync && mddev->recovery_cp == MaxSector) {
 			mddev->in_sync = 1;
-			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+			if (mddev->persistent)
+				set_bit(MD_CHANGE_CLEAN, &mddev->flags);
 		}
 		if (mddev->safemode == 1)
 			mddev->safemode = 0;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index dcb729244f47..b579cc628303 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -130,6 +130,9 @@ struct mddev_s
 					minor_version,
 					patch_version;
 	int				persistent;
+	int 				external;	/* metadata is
+							 * managed externally */
+	char				metadata_type[17]; /* externally set*/
 	int				chunk_size;
 	time_t				ctime, utime;
 	int				level, layout;
-- 
cgit 


From c620727779f7cc8ea96efb71f0651a26349e59c1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:52 -0800
Subject: md: allow a maximum extent to be set for resyncing

This allows userspace to control resync/reshape progress and synchronise it
with other activities, such as shared access in a SAN, or backing up critical
sections during a tricky reshape.

Writing a number of sectors (which must be a multiple of the chunk size if
such is meaningful) causes a resync to pause when it gets to that point.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/md.txt      | 10 +++++++
 drivers/md/md.c           | 75 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/md/raid1.c        |  2 ++
 drivers/md/raid10.c       |  3 ++
 drivers/md/raid5.c        | 25 ++++++++++++++++
 include/linux/raid/md_k.h |  2 ++
 6 files changed, 107 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index 5818628207b5..396cdd982c26 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -416,6 +416,16 @@ also have
      sectors in total that could need to be processed.  The two
      numbers are separated by a '/'  thus effectively showing one
      value, a fraction of the process that is complete.
+     A 'select' on this attribute will return when resync completes,
+     when it reaches the current sync_max (below) and possibly at
+     other times.
+
+   sync_max
+     This is a number of sectors at which point a resync/recovery
+     process will pause.  When a resync is active, the value can
+     only ever be increased, never decreased.  The value of 'max'
+     effectively disables the limit.
+
 
    sync_speed
      This shows the current actual speed, in K/sec, of the current
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 00788c56276f..79eb63fdb4b3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
 	spin_lock_init(&new->write_lock);
 	init_waitqueue_head(&new->sb_wait);
 	new->reshape_position = MaxSector;
+	new->resync_max = MaxSector;
 
 	new->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!new->queue) {
@@ -2920,6 +2921,43 @@ sync_completed_show(mddev_t *mddev, char *page)
 
 static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
 
+static ssize_t
+max_sync_show(mddev_t *mddev, char *page)
+{
+	if (mddev->resync_max == MaxSector)
+		return sprintf(page, "max\n");
+	else
+		return sprintf(page, "%llu\n",
+			       (unsigned long long)mddev->resync_max);
+}
+static ssize_t
+max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	if (strncmp(buf, "max", 3) == 0)
+		mddev->resync_max = MaxSector;
+	else {
+		char *ep;
+		unsigned long long max = simple_strtoull(buf, &ep, 10);
+		if (ep == buf || (*ep != 0 && *ep != '\n'))
+			return -EINVAL;
+		if (max < mddev->resync_max &&
+		    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+			return -EBUSY;
+
+		/* Must be a multiple of chunk_size */
+		if (mddev->chunk_size) {
+			if (max & (sector_t)((mddev->chunk_size>>9)-1))
+				return -EINVAL;
+		}
+		mddev->resync_max = max;
+	}
+	wake_up(&mddev->recovery_wait);
+	return len;
+}
+
+static struct md_sysfs_entry md_max_sync =
+__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
+
 static ssize_t
 suspend_lo_show(mddev_t *mddev, char *page)
 {
@@ -3030,6 +3068,7 @@ static struct attribute *md_redundancy_attrs[] = {
 	&md_sync_max.attr,
 	&md_sync_speed.attr,
 	&md_sync_completed.attr,
+	&md_max_sync.attr,
 	&md_suspend_lo.attr,
 	&md_suspend_hi.attr,
 	&md_bitmap.attr,
@@ -3579,6 +3618,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
 		mddev->size = 0;
 		mddev->raid_disks = 0;
 		mddev->recovery_cp = 0;
+		mddev->resync_max = MaxSector;
 		mddev->reshape_position = MaxSector;
 		mddev->external = 0;
 
@@ -5443,8 +5483,16 @@ void md_do_sync(mddev_t *mddev)
 		sector_t sectors;
 
 		skipped = 0;
+		if (j >= mddev->resync_max) {
+			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+			wait_event(mddev->recovery_wait,
+				   mddev->resync_max > j
+				   || kthread_should_stop());
+		}
+		if (kthread_should_stop())
+			goto interrupted;
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
-					    currspeed < speed_min(mddev));
+						  currspeed < speed_min(mddev));
 		if (sectors == 0) {
 			set_bit(MD_RECOVERY_ERR, &mddev->recovery);
 			goto out;
@@ -5486,15 +5534,9 @@ void md_do_sync(mddev_t *mddev)
 		}
 
 
-		if (kthread_should_stop()) {
-			/*
-			 * got a signal, exit.
-			 */
-			printk(KERN_INFO 
-				"md: md_do_sync() got signal ... exiting\n");
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			goto out;
-		}
+		if (kthread_should_stop())
+			goto interrupted;
+
 
 		/*
 		 * this loop exits only if either when we are slower than
@@ -5558,9 +5600,22 @@ void md_do_sync(mddev_t *mddev)
 
  skip:
 	mddev->curr_resync = 0;
+	mddev->resync_max = MaxSector;
+	sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 	wake_up(&resync_wait);
 	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
+	return;
+
+ interrupted:
+	/*
+	 * got a signal, exit.
+	 */
+	printk(KERN_INFO
+	       "md: md_do_sync() got signal ... exiting\n");
+	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+	goto out;
+
 }
 EXPORT_SYMBOL_GPL(md_do_sync);
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e0b8d0dd7a87..ae7c15207df5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1767,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return rv;
 	}
 
+	if (max_sector > mddev->resync_max)
+		max_sector = mddev->resync_max; /* Don't do IO beyond here */
 	nr_sectors = 0;
 	sync_blocks = 0;
 	do {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ba125277c6c4..d6f12882424d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return (max_sector - sector_nr) + sectors_skipped;
 	}
 
+	if (max_sector > mddev->resync_max)
+		max_sector = mddev->resync_max; /* Don't do IO beyond here */
+
 	/* make sure whole request will fit in a chunk - if chunks
 	 * are meaningful
 	 */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 388a974d63ef..e946de6f46bc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3698,6 +3698,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 		release_stripe(sh);
 		first_sector += STRIPE_SECTORS;
 	}
+	/* If this takes us to the resync_max point where we have to pause,
+	 * then we need to write out the superblock.
+	 */
+	sector_nr += conf->chunk_size>>9;
+	if (sector_nr >= mddev->resync_max) {
+		/* Cannot proceed until we've updated the superblock... */
+		wait_event(conf->wait_for_overlap,
+			   atomic_read(&conf->reshape_stripes) == 0);
+		mddev->reshape_position = conf->expand_progress;
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+		md_wakeup_thread(mddev->thread);
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
+			   || kthread_should_stop());
+		spin_lock_irq(&conf->device_lock);
+		conf->expand_lo = mddev->reshape_position;
+		spin_unlock_irq(&conf->device_lock);
+		wake_up(&conf->wait_for_overlap);
+	}
 	return conf->chunk_size>>9;
 }
 
@@ -3734,6 +3753,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
 		return reshape_request(mddev, sector_nr, skipped);
 
+	/* No need to check resync_max as we never do more than one
+	 * stripe, and as resync_max will always be on a chunk boundary,
+	 * if the check in md_do_sync didn't fire, there is no chance
+	 * of overstepping resync_max here
+	 */
+
 	/* if there is too many failed drives and we are trying
 	 * to resync, then assert that we are finished, because there is
 	 * nothing we can do.
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index b579cc628303..c77dca3221ed 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -219,6 +219,8 @@ struct mddev_s
 	atomic_t			recovery_active; /* blocks scheduled, but not written */
 	wait_queue_head_t		recovery_wait;
 	sector_t			recovery_cp;
+	sector_t			resync_max;	/* resync should pause
+							 * when it gets here */
 
 	spinlock_t			write_lock;
 	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
-- 
cgit 


From c5d79adba7ced41d7ac097c2ab74759d10522dd5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:54 -0800
Subject: md: allow devices to be shared between md arrays

Currently, a given device is "claimed" by a particular array so that it cannot
be used by other arrays.

This is not ideal for DDF and other metadata schemes which have their own
partitioning concept.

So for externally managed metadata, just claim the device for md in general,
require that "offset" and "size" are set properly for each device, and make
sure that if a device is included in different arrays then the active sections
do not overlap.

This involves adding another flag to the rdev which makes it awkward to set
"->flags = 0" to clear certain flags.  So now clear flags explicitly by name
when we want to clear things.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 88 +++++++++++++++++++++++++++++++++++++++++------
 include/linux/raid/md_k.h |  2 ++
 2 files changed, 80 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78fe3e97ff99..7c9a87b02e77 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -774,7 +774,11 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 	__u64 ev1 = md_event(sb);
 
 	rdev->raid_disk = -1;
-	rdev->flags = 0;
+	clear_bit(Faulty, &rdev->flags);
+	clear_bit(In_sync, &rdev->flags);
+	clear_bit(WriteMostly, &rdev->flags);
+	clear_bit(BarriersNotsupp, &rdev->flags);
+
 	if (mddev->raid_disks == 0) {
 		mddev->major_version = 0;
 		mddev->minor_version = sb->minor_version;
@@ -1154,7 +1158,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 	__u64 ev1 = le64_to_cpu(sb->events);
 
 	rdev->raid_disk = -1;
-	rdev->flags = 0;
+	clear_bit(Faulty, &rdev->flags);
+	clear_bit(In_sync, &rdev->flags);
+	clear_bit(WriteMostly, &rdev->flags);
+	clear_bit(BarriersNotsupp, &rdev->flags);
+
 	if (mddev->raid_disks == 0) {
 		mddev->major_version = 1;
 		mddev->patch_version = 0;
@@ -1402,7 +1410,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 		goto fail;
 	}
 	list_add(&rdev->same_set, &mddev->disks);
-	bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
+	bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
 	return 0;
 
  fail:
@@ -1442,7 +1450,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
  * otherwise reused by a RAID array (or any other kernel
  * subsystem), by bd_claiming the device.
  */
-static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
+static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
 {
 	int err = 0;
 	struct block_device *bdev;
@@ -1454,13 +1462,15 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
 			__bdevname(dev, b));
 		return PTR_ERR(bdev);
 	}
-	err = bd_claim(bdev, rdev);
+	err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
 	if (err) {
 		printk(KERN_ERR "md: could not bd_claim %s.\n",
 			bdevname(bdev, b));
 		blkdev_put(bdev);
 		return err;
 	}
+	if (!shared)
+		set_bit(AllReserved, &rdev->flags);
 	rdev->bdev = bdev;
 	return err;
 }
@@ -1925,7 +1935,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 			return -ENOSPC;
 		rdev->raid_disk = slot;
 		/* assume it is working */
-		rdev->flags = 0;
+		clear_bit(Faulty, &rdev->flags);
+		clear_bit(WriteMostly, &rdev->flags);
 		set_bit(In_sync, &rdev->flags);
 	}
 	return len;
@@ -1950,6 +1961,10 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 		return -EINVAL;
 	if (rdev->mddev->pers)
 		return -EBUSY;
+	if (rdev->size && rdev->mddev->external)
+		/* Must set offset before size, so overlap checks
+		 * can be sane */
+		return -EBUSY;
 	rdev->data_offset = offset;
 	return len;
 }
@@ -1963,16 +1978,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *page)
 	return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
 }
 
+static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
+{
+	/* check if two start/length pairs overlap */
+	if (s1+l1 <= s2)
+		return 0;
+	if (s2+l2 <= s1)
+		return 0;
+	return 1;
+}
+
 static ssize_t
 rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
 	char *e;
 	unsigned long long size = simple_strtoull(buf, &e, 10);
+	unsigned long long oldsize = rdev->size;
 	if (e==buf || (*e && *e != '\n'))
 		return -EINVAL;
 	if (rdev->mddev->pers)
 		return -EBUSY;
 	rdev->size = size;
+	if (size > oldsize && rdev->mddev->external) {
+		/* need to check that all other rdevs with the same ->bdev
+		 * do not overlap.  We need to unlock the mddev to avoid
+		 * a deadlock.  We have already changed rdev->size, and if
+		 * we have to change it back, we will have the lock again.
+		 */
+		mddev_t *mddev;
+		int overlap = 0;
+		struct list_head *tmp, *tmp2;
+
+		mddev_unlock(rdev->mddev);
+		ITERATE_MDDEV(mddev, tmp) {
+			mdk_rdev_t *rdev2;
+
+			mddev_lock(mddev);
+			ITERATE_RDEV(mddev, rdev2, tmp2)
+				if (test_bit(AllReserved, &rdev2->flags) ||
+				    (rdev->bdev == rdev2->bdev &&
+				     rdev != rdev2 &&
+				     overlaps(rdev->data_offset, rdev->size,
+					    rdev2->data_offset, rdev2->size))) {
+					overlap = 1;
+					break;
+				}
+			mddev_unlock(mddev);
+			if (overlap) {
+				mddev_put(mddev);
+				break;
+			}
+		}
+		mddev_lock(rdev->mddev);
+		if (overlap) {
+			/* Someone else could have slipped in a size
+			 * change here, but doing so is just silly.
+			 * We put oldsize back because we *know* it is
+			 * safe, and trust userspace not to race with
+			 * itself
+			 */
+			rdev->size = oldsize;
+			return -EBUSY;
+		}
+	}
 	if (size < rdev->mddev->size || rdev->mddev->size == 0)
 		rdev->mddev->size = size;
 	return len;
@@ -2056,7 +2124,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
 	if ((err = alloc_disk_sb(rdev)))
 		goto abort_free;
 
-	err = lock_rdev(rdev, newdev);
+	err = lock_rdev(rdev, newdev, super_format == -2);
 	if (err)
 		goto abort_free;
 
@@ -2609,7 +2677,9 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
 			if (err < 0)
 				goto out;
 		}
-	} else
+	} else if (mddev->external)
+		rdev = md_import_device(dev, -2, -1);
+	else
 		rdev = md_import_device(dev, -1, -1);
 
 	if (IS_ERR(rdev))
@@ -4019,8 +4089,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 		else
 			rdev->raid_disk = -1;
 
-		rdev->flags = 0;
-
 		if (rdev->raid_disk < mddev->raid_disks)
 			if (info->state & (1<<MD_DISK_SYNC))
 				set_bit(In_sync, &rdev->flags);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c77dca3221ed..5b2102e40286 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -81,6 +81,8 @@ struct mdk_rdev_s
 #define	In_sync		2		/* device is in_sync with rest of array */
 #define	WriteMostly	4		/* Avoid reading if at all possible */
 #define	BarriersNotsupp	5		/* BIO_RW_BARRIER is not supported */
+#define	AllReserved	6		/* If whole device is reserved for
+					 * one array */
 
 	int desc_nr;			/* descriptor index in the superblock */
 	int raid_disk;			/* role of device in array */
-- 
cgit 


From d089c6af10c2be5988f03667d6d22fe6085fbe5e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:59 -0800
Subject: md: change ITERATE_RDEV to rdev_for_each

As this is more in line with common practice in the kernel.  Also swap the
args around to be more like list_for_each.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bitmap.c       |  4 +--
 drivers/md/faulty.c       |  2 +-
 drivers/md/linear.c       |  2 +-
 drivers/md/md.c           | 64 +++++++++++++++++++++++------------------------
 drivers/md/multipath.c    |  2 +-
 drivers/md/raid0.c        |  8 +++---
 drivers/md/raid1.c        |  2 +-
 drivers/md/raid10.c       |  2 +-
 drivers/md/raid5.c        |  6 ++---
 include/linux/raid/md_k.h |  2 +-
 10 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9231cd700fe8..a0585fb6da94 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -237,7 +237,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	ITERATE_RDEV(mddev, rdev, tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (! test_bit(In_sync, &rdev->flags)
 		    || test_bit(Faulty, &rdev->flags))
 			continue;
@@ -261,7 +261,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 	struct list_head *tmp;
 	mddev_t *mddev = bitmap->mddev;
 
-	ITERATE_RDEV(mddev, rdev, tmp)
+	rdev_for_each(rdev, tmp, mddev)
 		if (test_bit(In_sync, &rdev->flags)
 		    && !test_bit(Faulty, &rdev->flags)) {
 			int size = PAGE_SIZE;
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index cf2ddce34118..d107ddceefcd 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -294,7 +294,7 @@ static int run(mddev_t *mddev)
 	}
 	conf->nfaults = 0;
 
-	ITERATE_RDEV(mddev, rdev, tmp)
+	rdev_for_each(rdev, tmp, mddev)
 		conf->rdev = rdev;
 
 	mddev->array_size = mddev->size;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 3dac1cfb8189..0b8511776b3e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -122,7 +122,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 	cnt = 0;
 	conf->array_size = 0;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		int j = rdev->raid_disk;
 		dev_info_t *disk = conf->disks + j;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c912ce2752a4..e02022864f08 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -311,7 +311,7 @@ static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 	mdk_rdev_t * rdev;
 	struct list_head *tmp;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (rdev->desc_nr == nr)
 			return rdev;
 	}
@@ -323,7 +323,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 	struct list_head *tmp;
 	mdk_rdev_t *rdev;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (rdev->bdev->bd_dev == dev)
 			return rdev;
 	}
@@ -943,7 +943,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 		sb->state |= (1<<MD_SB_BITMAP_PRESENT);
 
 	sb->disks[0].state = (1<<MD_DISK_REMOVED);
-	ITERATE_RDEV(mddev,rdev2,tmp) {
+	rdev_for_each(rdev2, tmp, mddev) {
 		mdp_disk_t *d;
 		int desc_nr;
 		if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1295,7 +1295,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	}
 
 	max_dev = 0;
-	ITERATE_RDEV(mddev,rdev2,tmp)
+	rdev_for_each(rdev2, tmp, mddev)
 		if (rdev2->desc_nr+1 > max_dev)
 			max_dev = rdev2->desc_nr+1;
 
@@ -1304,7 +1304,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	for (i=0; i<max_dev;i++)
 		sb->dev_roles[i] = cpu_to_le16(0xfffe);
 	
-	ITERATE_RDEV(mddev,rdev2,tmp) {
+	rdev_for_each(rdev2, tmp, mddev) {
 		i = rdev2->desc_nr;
 		if (test_bit(Faulty, &rdev2->flags))
 			sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1342,8 +1342,8 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
 	struct list_head *tmp, *tmp2;
 	mdk_rdev_t *rdev, *rdev2;
 
-	ITERATE_RDEV(mddev1,rdev,tmp)
-		ITERATE_RDEV(mddev2, rdev2, tmp2)
+	rdev_for_each(rdev, tmp, mddev1)
+		rdev_for_each(rdev2, tmp2, mddev2)
 			if (rdev->bdev->bd_contains ==
 			    rdev2->bdev->bd_contains)
 				return 1;
@@ -1516,7 +1516,7 @@ static void export_array(mddev_t *mddev)
 	struct list_head *tmp;
 	mdk_rdev_t *rdev;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (!rdev->mddev) {
 			MD_BUG();
 			continue;
@@ -1600,11 +1600,11 @@ static void md_print_devices(void)
 			bitmap_print_sb(mddev->bitmap);
 		else
 			printk("%s: ", mdname(mddev));
-		ITERATE_RDEV(mddev,rdev,tmp2)
+		rdev_for_each(rdev, tmp2, mddev)
 			printk("<%s>", bdevname(rdev->bdev,b));
 		printk("\n");
 
-		ITERATE_RDEV(mddev,rdev,tmp2)
+		rdev_for_each(rdev, tmp2, mddev)
 			print_rdev(rdev);
 	}
 	printk("md:	**********************************\n");
@@ -1623,7 +1623,7 @@ static void sync_sbs(mddev_t * mddev, int nospares)
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (rdev->sb_events == mddev->events ||
 		    (nospares &&
 		     rdev->raid_disk < 0 &&
@@ -1730,7 +1730,7 @@ repeat:
 		mdname(mddev),mddev->in_sync);
 
 	bitmap_update_sb(mddev->bitmap);
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		char b[BDEVNAME_SIZE];
 		dprintk(KERN_INFO "md: ");
 		if (rdev->sb_loaded != 1)
@@ -2016,7 +2016,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 			mdk_rdev_t *rdev2;
 
 			mddev_lock(mddev);
-			ITERATE_RDEV(mddev, rdev2, tmp2)
+			rdev_for_each(rdev2, tmp2, mddev)
 				if (test_bit(AllReserved, &rdev2->flags) ||
 				    (rdev->bdev == rdev2->bdev &&
 				     rdev != rdev2 &&
@@ -2202,7 +2202,7 @@ static void analyze_sbs(mddev_t * mddev)
 	char b[BDEVNAME_SIZE];
 
 	freshest = NULL;
-	ITERATE_RDEV(mddev,rdev,tmp)
+	rdev_for_each(rdev, tmp, mddev)
 		switch (super_types[mddev->major_version].
 			load_super(rdev, freshest, mddev->minor_version)) {
 		case 1:
@@ -2223,7 +2223,7 @@ static void analyze_sbs(mddev_t * mddev)
 		validate_super(mddev, freshest);
 
 	i = 0;
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (rdev != freshest)
 			if (super_types[mddev->major_version].
 			    validate_super(mddev, rdev)) {
@@ -3317,7 +3317,7 @@ static int do_md_run(mddev_t * mddev)
 		}
 
 		/* devices must have minimum size of one chunk */
-		ITERATE_RDEV(mddev,rdev,tmp) {
+		rdev_for_each(rdev, tmp, mddev) {
 			if (test_bit(Faulty, &rdev->flags))
 				continue;
 			if (rdev->size < chunk_size / 1024) {
@@ -3344,7 +3344,7 @@ static int do_md_run(mddev_t * mddev)
 	 * the only valid external interface is through the md
 	 * device.
 	 */
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		if (test_bit(Faulty, &rdev->flags))
 			continue;
 		sync_blockdev(rdev->bdev);
@@ -3410,8 +3410,8 @@ static int do_md_run(mddev_t * mddev)
 		mdk_rdev_t *rdev2;
 		struct list_head *tmp2;
 		int warned = 0;
-		ITERATE_RDEV(mddev, rdev, tmp) {
-			ITERATE_RDEV(mddev, rdev2, tmp2) {
+		rdev_for_each(rdev, tmp, mddev) {
+			rdev_for_each(rdev2, tmp2, mddev) {
 				if (rdev < rdev2 &&
 				    rdev->bdev->bd_contains ==
 				    rdev2->bdev->bd_contains) {
@@ -3471,7 +3471,7 @@ static int do_md_run(mddev_t * mddev)
 	mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
 	mddev->in_sync = 1;
 
-	ITERATE_RDEV(mddev,rdev,tmp)
+	rdev_for_each(rdev, tmp, mddev)
 		if (rdev->raid_disk >= 0) {
 			char nm[20];
 			sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3504,7 +3504,7 @@ static int do_md_run(mddev_t * mddev)
 	if (mddev->degraded && !mddev->sync_thread) {
 		struct list_head *rtmp;
 		int spares = 0;
-		ITERATE_RDEV(mddev,rdev,rtmp)
+		rdev_for_each(rdev, rtmp, mddev)
 			if (rdev->raid_disk >= 0 &&
 			    !test_bit(In_sync, &rdev->flags) &&
 			    !test_bit(Faulty, &rdev->flags))
@@ -3681,7 +3681,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
 		}
 		mddev->bitmap_offset = 0;
 
-		ITERATE_RDEV(mddev,rdev,tmp)
+		rdev_for_each(rdev, tmp, mddev)
 			if (rdev->raid_disk >= 0) {
 				char nm[20];
 				sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3723,7 +3723,7 @@ static void autorun_array(mddev_t *mddev)
 
 	printk(KERN_INFO "md: running: ");
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		char b[BDEVNAME_SIZE];
 		printk("<%s>", bdevname(rdev->bdev,b));
 	}
@@ -3851,7 +3851,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
 	struct list_head *tmp;
 
 	nr=working=active=failed=spare=0;
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		nr++;
 		if (test_bit(Faulty, &rdev->flags))
 			failed++;
@@ -4391,7 +4391,7 @@ static int update_size(mddev_t *mddev, unsigned long size)
 	 */
 	if (mddev->sync_thread)
 		return -EBUSY;
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		sector_t avail;
 		avail = rdev->size * 2;
 
@@ -5132,7 +5132,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 		}
 
 		size = 0;
-		ITERATE_RDEV(mddev,rdev,tmp2) {
+		rdev_for_each(rdev, tmp2, mddev) {
 			char b[BDEVNAME_SIZE];
 			seq_printf(seq, " %s[%d]",
 				bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -5288,7 +5288,7 @@ static int is_mddev_idle(mddev_t *mddev)
 	long curr_events;
 
 	idle = 1;
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
 		curr_events = disk_stat_read(disk, sectors[0]) + 
 				disk_stat_read(disk, sectors[1]) - 
@@ -5515,7 +5515,7 @@ void md_do_sync(mddev_t *mddev)
 		/* recovery follows the physical size of devices */
 		max_sectors = mddev->size << 1;
 		j = MaxSector;
-		ITERATE_RDEV(mddev,rdev,rtmp)
+		rdev_for_each(rdev, rtmp, mddev)
 			if (rdev->raid_disk >= 0 &&
 			    !test_bit(Faulty, &rdev->flags) &&
 			    !test_bit(In_sync, &rdev->flags) &&
@@ -5668,7 +5668,7 @@ void md_do_sync(mddev_t *mddev)
 		} else {
 			if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
 				mddev->curr_resync = MaxSector;
-			ITERATE_RDEV(mddev,rdev,rtmp)
+			rdev_for_each(rdev, rtmp, mddev)
 				if (rdev->raid_disk >= 0 &&
 				    !test_bit(Faulty, &rdev->flags) &&
 				    !test_bit(In_sync, &rdev->flags) &&
@@ -5706,7 +5706,7 @@ static int remove_and_add_spares(mddev_t *mddev)
 	struct list_head *rtmp;
 	int spares = 0;
 
-	ITERATE_RDEV(mddev,rdev,rtmp)
+	rdev_for_each(rdev, rtmp, mddev)
 		if (rdev->raid_disk >= 0 &&
 		    !mddev->external &&
 		    (test_bit(Faulty, &rdev->flags) ||
@@ -5722,7 +5722,7 @@ static int remove_and_add_spares(mddev_t *mddev)
 		}
 
 	if (mddev->degraded) {
-		ITERATE_RDEV(mddev,rdev,rtmp)
+		rdev_for_each(rdev, rtmp, mddev)
 			if (rdev->raid_disk < 0
 			    && !test_bit(Faulty, &rdev->flags)) {
 				rdev->recovery_offset = 0;
@@ -5836,7 +5836,7 @@ void md_check_recovery(mddev_t *mddev)
 			 * information must be scrapped
 			 */
 			if (!mddev->degraded)
-				ITERATE_RDEV(mddev,rdev,rtmp)
+				rdev_for_each(rdev, rtmp, mddev)
 					rdev->saved_raid_disk = -1;
 
 			mddev->recovery = 0;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index eb631ebed686..3f299d835a2b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -436,7 +436,7 @@ static int multipath_run (mddev_t *mddev)
 	}
 
 	conf->working_disks = 0;
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		disk_idx = rdev->raid_disk;
 		if (disk_idx < 0 ||
 		    disk_idx >= mddev->raid_disks)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f8e591708d1f..818b48284096 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -72,11 +72,11 @@ static int create_strip_zones (mddev_t *mddev)
 	 */
 	conf->nr_strip_zones = 0;
  
-	ITERATE_RDEV(mddev,rdev1,tmp1) {
+	rdev_for_each(rdev1, tmp1, mddev) {
 		printk("raid0: looking at %s\n",
 			bdevname(rdev1->bdev,b));
 		c = 0;
-		ITERATE_RDEV(mddev,rdev2,tmp2) {
+		rdev_for_each(rdev2, tmp2, mddev) {
 			printk("raid0:   comparing %s(%llu)",
 			       bdevname(rdev1->bdev,b),
 			       (unsigned long long)rdev1->size);
@@ -124,7 +124,7 @@ static int create_strip_zones (mddev_t *mddev)
 	cnt = 0;
 	smallest = NULL;
 	zone->dev = conf->devlist;
-	ITERATE_RDEV(mddev, rdev1, tmp1) {
+	rdev_for_each(rdev1, tmp1, mddev) {
 		int j = rdev1->raid_disk;
 
 		if (j < 0 || j >= mddev->raid_disks) {
@@ -293,7 +293,7 @@ static int raid0_run (mddev_t *mddev)
 
 	/* calculate array device size */
 	mddev->array_size = 0;
-	ITERATE_RDEV(mddev,rdev,tmp)
+	rdev_for_each(rdev, tmp, mddev)
 		mddev->array_size += rdev->size;
 
 	printk("raid0 : md_size is %llu blocks.\n", 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ae7c15207df5..5c7fef091cec 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1887,7 +1887,7 @@ static int run(mddev_t *mddev)
 	if (!conf->r1bio_pool)
 		goto out_no_mem;
 
-	ITERATE_RDEV(mddev, rdev, tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		disk_idx = rdev->raid_disk;
 		if (disk_idx >= mddev->raid_disks
 		    || disk_idx < 0)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d6f12882424d..017f58113c33 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2026,7 +2026,7 @@ static int run(mddev_t *mddev)
 		goto out_free_conf;
 	}
 
-	ITERATE_RDEV(mddev, rdev, tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		disk_idx = rdev->raid_disk;
 		if (disk_idx >= mddev->raid_disks
 		    || disk_idx < 0)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e946de6f46bc..63bfb0757829 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4136,7 +4136,7 @@ static int run(mddev_t *mddev)
 
 	pr_debug("raid5: run(%s) called.\n", mdname(mddev));
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
+	rdev_for_each(rdev, tmp, mddev) {
 		raid_disk = rdev->raid_disk;
 		if (raid_disk >= conf->raid_disks
 		    || raid_disk < 0)
@@ -4549,7 +4549,7 @@ static int raid5_start_reshape(mddev_t *mddev)
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 		return -EBUSY;
 
-	ITERATE_RDEV(mddev, rdev, rtmp)
+	rdev_for_each(rdev, rtmp, mddev)
 		if (rdev->raid_disk < 0 &&
 		    !test_bit(Faulty, &rdev->flags))
 			spares++;
@@ -4571,7 +4571,7 @@ static int raid5_start_reshape(mddev_t *mddev)
 	/* Add some new drives, as many as will fit.
 	 * We know there are enough to make the newly sized array work.
 	 */
-	ITERATE_RDEV(mddev, rdev, rtmp)
+	rdev_for_each(rdev, rtmp, mddev)
 		if (rdev->raid_disk < 0 &&
 		    !test_bit(Faulty, &rdev->flags)) {
 			if (raid5_add_disk(mddev, rdev)) {
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 5b2102e40286..9c19555f314b 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -322,7 +322,7 @@ static inline char * mdname (mddev_t * mddev)
 /*
  * iterates through the 'same array disks' ringlist
  */
-#define ITERATE_RDEV(mddev,rdev,tmp)					\
+#define rdev_for_each(rdev, tmp, mddev)				\
 	ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp)
 
 /*
-- 
cgit 


From 73c34431c7119d0bc7d3436abfad75fe47b2c51f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 6 Feb 2008 01:39:59 -0800
Subject: md: change ITERATE_RDEV_GENERIC to rdev_for_each_list, and remove
 ITERATE_RDEV_PENDING.

Finish ITERATE_ to for_each conversion.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           |  8 ++++----
 include/linux/raid/md_k.h | 14 ++++----------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index e02022864f08..5fc326d3970e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3766,7 +3766,7 @@ static void autorun_devices(int part)
 		printk(KERN_INFO "md: considering %s ...\n",
 			bdevname(rdev0->bdev,b));
 		INIT_LIST_HEAD(&candidates);
-		ITERATE_RDEV_PENDING(rdev,tmp)
+		rdev_for_each_list(rdev, tmp, pending_raid_disks)
 			if (super_90_load(rdev, rdev0, 0) >= 0) {
 				printk(KERN_INFO "md:  adding %s ...\n",
 					bdevname(rdev->bdev,b));
@@ -3810,7 +3810,7 @@ static void autorun_devices(int part)
 		} else {
 			printk(KERN_INFO "md: created %s\n", mdname(mddev));
 			mddev->persistent = 1;
-			ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
+			rdev_for_each_list(rdev, tmp, candidates) {
 				list_del_init(&rdev->same_set);
 				if (bind_rdev_to_array(rdev, mddev))
 					export_rdev(rdev);
@@ -3821,7 +3821,7 @@ static void autorun_devices(int part)
 		/* on success, candidates will be empty, on error
 		 * it won't...
 		 */
-		ITERATE_RDEV_GENERIC(candidates,rdev,tmp)
+		rdev_for_each_list(rdev, tmp, candidates)
 			export_rdev(rdev);
 		mddev_put(mddev);
 	}
@@ -4936,7 +4936,7 @@ static void status_unused(struct seq_file *seq)
 
 	seq_printf(seq, "unused devices: ");
 
-	ITERATE_RDEV_PENDING(rdev,tmp) {
+	rdev_for_each_list(rdev, tmp, pending_raid_disks) {
 		char b[BDEVNAME_SIZE];
 		i++;
 		seq_printf(seq, "%s ",
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 9c19555f314b..85a068bab625 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -313,23 +313,17 @@ static inline char * mdname (mddev_t * mddev)
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
  */
-#define ITERATE_RDEV_GENERIC(head,rdev,tmp)				\
+#define rdev_for_each_list(rdev, tmp, list)				\
 									\
-	for ((tmp) = (head).next;					\
+	for ((tmp) = (list).next;					\
 		(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),	\
-			(tmp) = (tmp)->next, (tmp)->prev != &(head)	\
+			(tmp) = (tmp)->next, (tmp)->prev != &(list)	\
 		; )
 /*
  * iterates through the 'same array disks' ringlist
  */
 #define rdev_for_each(rdev, tmp, mddev)				\
-	ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp)
-
-/*
- * Iterates through 'pending RAID disks'
- */
-#define ITERATE_RDEV_PENDING(rdev,tmp)					\
-	ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp)
+	rdev_for_each_list(rdev, tmp, (mddev)->disks)
 
 typedef struct mdk_thread_s {
 	void			(*run) (mddev_t *mddev);
-- 
cgit 


From b3bd86e2fdce01d6b49271a553d2a18b3e0510f3 Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Wed, 6 Feb 2008 01:40:04 -0800
Subject: isapnp driver semaphore to mutex

Changed the isapnp semaphore to a mutex.

[akpm@linux-foundation.org: no externs-in-c]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Daniel Walker <dwalker@mvista.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pnp/interface.c | 13 +++++++------
 drivers/pnp/manager.c   | 19 ++++++++++---------
 include/linux/pnp.h     |  1 +
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index 31548044fdde..982658477a58 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -10,9 +10,12 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/pnp.h>
 #include <linux/stat.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
+
 #include <asm/uaccess.h>
 
 #include "base.h"
@@ -315,8 +318,6 @@ static ssize_t pnp_show_current_resources(struct device *dmdev,
 	return ret;
 }
 
-extern struct semaphore pnp_res_mutex;
-
 static ssize_t
 pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
 			  const char *ubuf, size_t count)
@@ -361,10 +362,10 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
 		goto done;
 	}
 	if (!strnicmp(buf, "get", 3)) {
-		down(&pnp_res_mutex);
+		mutex_lock(&pnp_res_mutex);
 		if (pnp_can_read(dev))
 			dev->protocol->get(dev, &dev->res);
-		up(&pnp_res_mutex);
+		mutex_unlock(&pnp_res_mutex);
 		goto done;
 	}
 	if (!strnicmp(buf, "set", 3)) {
@@ -373,7 +374,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
 			goto done;
 		buf += 3;
 		pnp_init_resource_table(&dev->res);
-		down(&pnp_res_mutex);
+		mutex_lock(&pnp_res_mutex);
 		while (1) {
 			while (isspace(*buf))
 				++buf;
@@ -455,7 +456,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
 			}
 			break;
 		}
-		up(&pnp_res_mutex);
+		mutex_unlock(&pnp_res_mutex);
 		goto done;
 	}
 
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 6f8f8ed95c67..c28caf272c11 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -12,9 +12,10 @@
 #include <linux/pnp.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/mutex.h>
 #include "base.h"
 
-DECLARE_MUTEX(pnp_res_mutex);
+DEFINE_MUTEX(pnp_res_mutex);
 
 static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
 {
@@ -297,7 +298,7 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
 	if (!pnp_can_configure(dev))
 		return -ENODEV;
 
-	down(&pnp_res_mutex);
+	mutex_lock(&pnp_res_mutex);
 	pnp_clean_resource_table(&dev->res);	/* start with a fresh slate */
 	if (dev->independent) {
 		port = dev->independent->port;
@@ -366,12 +367,12 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
 	} else if (dev->dependent)
 		goto fail;
 
-	up(&pnp_res_mutex);
+	mutex_unlock(&pnp_res_mutex);
 	return 1;
 
 fail:
 	pnp_clean_resource_table(&dev->res);
-	up(&pnp_res_mutex);
+	mutex_unlock(&pnp_res_mutex);
 	return 0;
 }
 
@@ -396,7 +397,7 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res,
 		return -ENOMEM;
 	*bak = dev->res;
 
-	down(&pnp_res_mutex);
+	mutex_lock(&pnp_res_mutex);
 	dev->res = *res;
 	if (!(mode & PNP_CONFIG_FORCE)) {
 		for (i = 0; i < PNP_MAX_PORT; i++) {
@@ -416,14 +417,14 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res,
 				goto fail;
 		}
 	}
-	up(&pnp_res_mutex);
+	mutex_unlock(&pnp_res_mutex);
 
 	kfree(bak);
 	return 0;
 
 fail:
 	dev->res = *bak;
-	up(&pnp_res_mutex);
+	mutex_unlock(&pnp_res_mutex);
 	kfree(bak);
 	return -EINVAL;
 }
@@ -547,9 +548,9 @@ int pnp_disable_dev(struct pnp_dev *dev)
 	dev->active = 0;
 
 	/* release the resources so that other devices can use them */
-	down(&pnp_res_mutex);
+	mutex_lock(&pnp_res_mutex);
 	pnp_clean_resource_table(&dev->res);
-	up(&pnp_res_mutex);
+	mutex_unlock(&pnp_res_mutex);
 
 	return 0;
 }
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b9339d8b95bc..cd6332b88829 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -258,6 +258,7 @@ extern struct pnp_protocol isapnp_protocol;
 #else
 #define pnp_device_is_isapnp(dev) 0
 #endif
+extern struct mutex pnp_res_mutex;
 
 #ifdef CONFIG_PNPBIOS
 extern struct pnp_protocol pnpbios_protocol;
-- 
cgit 


From 533083836fd55ca67ce35ab3d914b74ec1a5b9a8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 6 Feb 2008 01:40:12 -0800
Subject: make jbd/journal.c:__journal_abort_hard() static

__journal_abort_hard() can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd/journal.c    | 2 +-
 include/linux/jbd.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5d14243499d4..3943a8905eb2 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1457,7 +1457,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
  * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
  * and don't attempt to make any other journal updates.
  */
-void __journal_abort_hard(journal_t *journal)
+static void __journal_abort_hard(journal_t *journal)
 {
 	transaction_t *transaction;
 	char b[BDEVNAME_SIZE];
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index d9ecd13393b0..59b94c3e6c24 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -924,7 +924,6 @@ extern int	   journal_recover    (journal_t *journal);
 extern int	   journal_wipe       (journal_t *, int);
 extern int	   journal_skip_recovery	(journal_t *);
 extern void	   journal_update_superblock	(journal_t *, int);
-extern void	   __journal_abort_hard	(journal_t *);
 extern void	   journal_abort      (journal_t *, int);
 extern int	   journal_errno      (journal_t *);
 extern void	   journal_ack_err    (journal_t *);
-- 
cgit 


From d8fd66aaea7fe3e4f1ea044a563f129e3b9f05ff Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 6 Feb 2008 01:40:19 -0800
Subject: jbd.h: hide kernel only code

Move a few kernel-only things into __KERNEL__.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jbd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 59b94c3e6c24..b18fd3b9b835 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -33,7 +33,6 @@
 #include <linux/lockdep.h>
 
 #include <asm/semaphore.h>
-#endif
 
 #define journal_oom_retry 1
 
@@ -84,7 +83,6 @@ static inline void jbd_free(void *ptr, size_t size)
 
 #define JFS_MIN_JOURNAL_BLOCKS 1024
 
-#ifdef __KERNEL__
 
 /**
  * typedef handle_t - The handle_t type represents a single atomic update being performed by some process.
-- 
cgit 


From b5556a67f08559b6c1597f6396c1f9ef460f62b4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 6 Feb 2008 22:39:44 +0100
Subject: cpuidle: dubious one-bit signed bitfield in cpuidle.h

fix these sparse warnings:

  CHECK   arch/x86/kernel/acpi/cstate.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield
  CHECK   arch/x86/kernel/acpi/processor.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield
  CHECK   arch/x86/kernel/cpu/cpufreq/powernow-k7.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield
  CHECK   arch/x86/kernel/cpu/cpufreq/powernow-k8.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield
  CHECK   arch/x86/kernel/cpu/cpufreq/longhaul.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield
  CHECK   arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
include/linux/cpuidle.h:82:17: error: dubious one-bit signed bitfield

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpuidle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c4e00161a247..b0fd85ab9efb 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -79,7 +79,7 @@ struct cpuidle_state_kobj {
 };
 
 struct cpuidle_device {
-	int			enabled:1;
+	unsigned int		enabled:1;
 	unsigned int		cpu;
 
 	int			last_residency;
-- 
cgit 


From 9f9975a55dbcd82ff4a222691a6dcd7b3145b9c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Feb 2008 22:39:45 +0100
Subject: generic: add __FINITDATA

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 90cdbbbbe077..a404a0055dd7 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -110,6 +110,7 @@
 #define __FINIT		.previous
 
 #define __INITDATA	.section	".init.data","aw"
+#define __FINITDATA	.previous
 
 #define __DEVINIT        .section	".devinit.text", "ax"
 #define __DEVINITDATA    .section	".devinit.data", "aw"
-- 
cgit 


From 6ed31e92e94830c138fbd470486383380710069a Mon Sep 17 00:00:00 2001
From: Éric Piel <Eric.Piel@tremplin-utc.net>
Date: Tue, 5 Feb 2008 00:04:50 +0100
Subject: ACPI: Taint kernel on ACPI table override (format corrected)

When an ACPI table is overridden (for now this can happen only for DSDT)
display a big warning and taint the kernel with flag A.

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c     | 7 +++++++
 include/linux/kernel.h | 1 +
 kernel/panic.c         | 5 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 131936e7ff17..bbd8360bfb23 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -392,6 +392,13 @@ acpi_os_table_override(struct acpi_table_header * existing_table,
 			*new_table = initrd_table;
 	}
 #endif
+	if (*new_table != NULL) {
+		printk(KERN_WARNING PREFIX "Override [%4.4s-%8.8s], "
+			   "this is unsafe: tainting kernel\n",
+		       existing_table->signature,
+		       existing_table->oem_table_id);
+		add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
+	}
 	return AE_OK;
 }
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 18222f267bc4..9e01f376840a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -243,6 +243,7 @@ extern enum system_states {
 #define TAINT_BAD_PAGE			(1<<5)
 #define TAINT_USER			(1<<6)
 #define TAINT_DIE			(1<<7)
+#define TAINT_OVERRIDDEN_ACPI_TABLE	(1<<8)
 
 extern void dump_stack(void) __cold;
 
diff --git a/kernel/panic.c b/kernel/panic.c
index d9e90cfe3298..24af9f8bac99 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -161,7 +161,7 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c",
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
@@ -169,7 +169,8 @@ const char *print_tainted(void)
 			tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
 			tainted & TAINT_BAD_PAGE ? 'B' : ' ',
 			tainted & TAINT_USER ? 'U' : ' ',
-			tainted & TAINT_DIE ? 'D' : ' ');
+			tainted & TAINT_DIE ? 'D' : ' ',
+			tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ');
 	}
 	else
 		snprintf(buf, sizeof(buf), "Not tainted");
-- 
cgit 


From 5229e87d59cef33539322948bd8e3b5a537f7c97 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Feb 2008 01:26:55 -0500
Subject: ACPI: create /sys/firmware/acpi/interrupts

See Documentation/ABI/testing/sysfs-firmware-acpi

Based-on-original-patch-by: Luming Yu <luming.yu@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/ABI/testing/sysfs-firmware-acpi |  99 ++++++++++++
 drivers/acpi/events/evevent.c                 |   2 +-
 drivers/acpi/events/evgpe.c                   |   2 +-
 drivers/acpi/osl.c                            |  12 +-
 drivers/acpi/system.c                         | 208 ++++++++++++++++++++++++++
 drivers/acpi/utilities/utglobal.c             |   2 -
 include/acpi/acglobal.h                       |   4 -
 include/acpi/acpiosxf.h                       |   3 +
 include/linux/acpi.h                          |   2 +
 9 files changed, 325 insertions(+), 9 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-acpi

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
new file mode 100644
index 000000000000..9470ed9afcc0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -0,0 +1,99 @@
+What:		/sys/firmware/acpi/interrupts/
+Date:		February 2008
+Contact:	Len Brown <lenb@kernel.org>
+Description:
+		All ACPI interrupts are handled via a single IRQ,
+		the System Control Interrupt (SCI), which appears
+		as "acpi" in /proc/interrupts.
+
+		However, one of the main functions of ACPI is to make
+		the platform understand random hardware without
+		special driver support.  So while the SCI handles a few
+		well known (fixed feature) interrupts sources, such
+		as the power button, it can also handle a variable
+		number of a "General Purpose Events" (GPE).
+
+		A GPE vectors to a specified handler in AML, which
+		can do a anything the BIOS writer wants from
+		OS context.  GPE 0x12, for example, would vector
+		to a level or edge handler called _L12 or _E12.
+		The handler may do its business and return.
+		Or the handler may send send a Notify event
+		to a Linux device driver registered on an ACPI device,
+		such as a battery, or a processor.
+
+		To figure out where all the SCI's are coming from,
+		/sys/firmware/acpi/interrupts contains a file listing
+		every possible source, and the count of how many
+		times it has triggered.
+
+		$ cd /sys/firmware/acpi/interrupts
+		$ grep . *
+		error:0
+		ff_gbl_lock:0
+		ff_pmtimer:0
+		ff_pwr_btn:0
+		ff_rt_clk:0
+		ff_slp_btn:0
+		gpe00:0
+		gpe01:0
+		gpe02:0
+		gpe03:0
+		gpe04:0
+		gpe05:0
+		gpe06:0
+		gpe07:0
+		gpe08:0
+		gpe09:174
+		gpe0A:0
+		gpe0B:0
+		gpe0C:0
+		gpe0D:0
+		gpe0E:0
+		gpe0F:0
+		gpe10:0
+		gpe11:60
+		gpe12:0
+		gpe13:0
+		gpe14:0
+		gpe15:0
+		gpe16:0
+		gpe17:0
+		gpe18:0
+		gpe19:7
+		gpe1A:0
+		gpe1B:0
+		gpe1C:0
+		gpe1D:0
+		gpe1E:0
+		gpe1F:0
+		gpe_all:241
+		sci:241
+
+		sci - The total number of times the ACPI SCI
+		has claimed an interrupt.
+
+		gpe_all - count of SCI caused by GPEs.
+
+		gpeXX - count for individual GPE source
+
+		ff_gbl_lock - Global Lock
+
+		ff_pmtimer - PM Timer
+
+		ff_pwr_btn - Power Button
+
+		ff_rt_clk - Real Time Clock
+
+		ff_slp_btn - Sleep Button
+
+		error - an interrupt that can't be accounted for above.
+
+		Root has permission to clear any of these counters.  Eg.
+		# echo 0 > gpe11
+
+		All counters can be cleared by clearing the total "sci":
+		# echo 0 > sci
+
+		None of these counters has an effect on the function
+		of the system, they are simply statistics.
diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c
index e41287815ea1..3048801a37b5 100644
--- a/drivers/acpi/events/evevent.c
+++ b/drivers/acpi/events/evevent.c
@@ -259,7 +259,7 @@ u32 acpi_ev_fixed_event_detect(void)
 			enable_bit_mask)) {
 
 			/* Found an active (signalled) event */
-
+			acpi_os_fixed_event_count(i);
 			int_status |= acpi_ev_fixed_event_dispatch((u32) i);
 		}
 	}
diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c
index e22f4a973c0f..4bd9e2291bd9 100644
--- a/drivers/acpi/events/evgpe.c
+++ b/drivers/acpi/events/evgpe.c
@@ -618,7 +618,7 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
 
 	ACPI_FUNCTION_TRACE(ev_gpe_dispatch);
 
-	acpi_gpe_count++;
+	acpi_os_gpe_count(gpe_number);
 
 	/*
 	 * If edge-triggered, clear the GPE status bit now.  Note that
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index e53fb516f9d4..1087efeca9b1 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -332,7 +332,15 @@ acpi_os_table_override(struct acpi_table_header * existing_table,
 
 static irqreturn_t acpi_irq(int irq, void *dev_id)
 {
-	return (*acpi_irq_handler) (acpi_irq_context) ? IRQ_HANDLED : IRQ_NONE;
+	u32 handled;
+
+	handled = (*acpi_irq_handler) (acpi_irq_context);
+
+	if (handled) {
+		acpi_irq_handled++;
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
 }
 
 acpi_status
@@ -341,6 +349,8 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler,
 {
 	unsigned int irq;
 
+	acpi_irq_stats_init();
+
 	/*
 	 * Ignore the GSI from the core, and use the value in our copy of the
 	 * FADT. It may not be the same if an interrupt source override exists
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 5ffe0ea18967..ce881713f7a6 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -40,6 +40,8 @@ ACPI_MODULE_NAME("system");
 #define ACPI_SYSTEM_CLASS		"system"
 #define ACPI_SYSTEM_DEVICE_NAME		"System"
 
+u32 acpi_irq_handled;
+
 /*
  * Make ACPICA version work as module param
  */
@@ -166,6 +168,212 @@ static int acpi_system_sysfs_init(void)
 	return 0;
 }
 
+/*
+ * Detailed ACPI IRQ counters in /sys/firmware/acpi/interrupts/
+ * See Documentation/ABI/testing/sysfs-firmware-acpi
+ */
+
+#define COUNT_GPE 0
+#define COUNT_SCI 1	/* acpi_irq_handled */
+#define COUNT_ERROR 2	/* other */
+#define NUM_COUNTERS_EXTRA 3
+
+static u32 *all_counters;
+static u32 num_gpes;
+static u32 num_counters;
+static struct attribute **all_attrs;
+static u32 acpi_gpe_count;
+
+static struct attribute_group interrupt_stats_attr_group = {
+	.name = "interrupts",
+};
+static struct kobj_attribute *counter_attrs;
+
+static int count_num_gpes(void)
+{
+	int count = 0;
+	struct acpi_gpe_xrupt_info *gpe_xrupt_info;
+	struct acpi_gpe_block_info *gpe_block;
+	acpi_cpu_flags flags;
+
+	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+	gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head;
+	while (gpe_xrupt_info) {
+		gpe_block = gpe_xrupt_info->gpe_block_list_head;
+		while (gpe_block) {
+			count += gpe_block->register_count *
+			    ACPI_GPE_REGISTER_WIDTH;
+			gpe_block = gpe_block->next;
+		}
+		gpe_xrupt_info = gpe_xrupt_info->next;
+	}
+	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+
+	return count;
+}
+
+static void delete_gpe_attr_array(void)
+{
+	u32 *tmp = all_counters;
+
+	all_counters = NULL;
+	kfree(tmp);
+
+	if (counter_attrs) {
+		int i;
+
+		for (i = 0; i < num_gpes; i++)
+			kfree(counter_attrs[i].attr.name);
+
+		kfree(counter_attrs);
+	}
+	kfree(all_attrs);
+
+	return;
+}
+
+void acpi_os_gpe_count(u32 gpe_number)
+{
+	acpi_gpe_count++;
+
+	if (!all_counters)
+		return;
+
+	if (gpe_number < num_gpes)
+		all_counters[gpe_number]++;
+	else
+		all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++;
+
+	return;
+}
+
+void acpi_os_fixed_event_count(u32 event_number)
+{
+	if (!all_counters)
+		return;
+
+	if (event_number < ACPI_NUM_FIXED_EVENTS)
+		all_counters[num_gpes + event_number]++;
+	else
+		all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++;
+
+	return;
+}
+
+static ssize_t counter_show(struct kobject *kobj,
+	struct kobj_attribute *attr, char *buf)
+{
+	all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI] =
+		acpi_irq_handled;
+	all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE] =
+		acpi_gpe_count;
+
+	return sprintf(buf, "%d\n", all_counters[attr - counter_attrs]);
+}
+
+/*
+ * counter_set() sets the specified counter.
+ * setting the total "sci" file to any value clears all counters.
+ */
+static ssize_t counter_set(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t size)
+{
+	int index = attr - counter_attrs;
+
+	if (index == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) {
+		int i;
+		for (i = 0; i < num_counters; ++i)
+			all_counters[i] = 0;
+		acpi_gpe_count = 0;
+		acpi_irq_handled = 0;
+
+	} else
+		all_counters[index] = strtoul(buf, NULL, 0);
+
+	return size;
+}
+
+void acpi_irq_stats_init(void)
+{
+	int i;
+
+	if (all_counters)
+		return;
+
+	num_gpes = count_num_gpes();
+	num_counters = num_gpes + ACPI_NUM_FIXED_EVENTS + NUM_COUNTERS_EXTRA;
+
+	all_attrs = kzalloc(sizeof(struct attribute *) * (num_counters + 1),
+			GFP_KERNEL);
+	if (all_attrs == NULL)
+		return;
+
+	all_counters = kzalloc(sizeof(u32) * (num_counters), GFP_KERNEL);
+	if (all_counters == NULL)
+		goto fail;
+
+	counter_attrs = kzalloc(sizeof(struct kobj_attribute) * (num_counters),
+			GFP_KERNEL);
+	if (counter_attrs == NULL)
+		goto fail;
+
+	for (i = 0; i < num_counters; ++i) {
+		char buffer[10];
+		char *name;
+
+		if (i < num_gpes)
+			sprintf(buffer, "gpe%02X", i);
+		else if (i == num_gpes + ACPI_EVENT_PMTIMER)
+			sprintf(buffer, "ff_pmtimer");
+		else if (i == num_gpes + ACPI_EVENT_GLOBAL)
+			sprintf(buffer, "ff_gbl_lock");
+		else if (i == num_gpes + ACPI_EVENT_POWER_BUTTON)
+			sprintf(buffer, "ff_pwr_btn");
+		else if (i == num_gpes + ACPI_EVENT_SLEEP_BUTTON)
+			sprintf(buffer, "ff_slp_btn");
+		else if (i == num_gpes + ACPI_EVENT_RTC)
+			sprintf(buffer, "ff_rt_clk");
+		else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE)
+			sprintf(buffer, "gpe_all");
+		else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI)
+			sprintf(buffer, "sci");
+		else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR)
+			sprintf(buffer, "error");
+		else
+			sprintf(buffer, "bug%02X", i);
+
+		name = kzalloc(strlen(buffer) + 1, GFP_KERNEL);
+		if (name == NULL)
+			goto fail;
+		strncpy(name, buffer, strlen(buffer) + 1);
+
+		counter_attrs[i].attr.name = name;
+		counter_attrs[i].attr.mode = 0644;
+		counter_attrs[i].show = counter_show;
+		counter_attrs[i].store = counter_set;
+
+		all_attrs[i] = &counter_attrs[i].attr;
+	}
+
+	interrupt_stats_attr_group.attrs = all_attrs;
+	sysfs_create_group(acpi_kobj, &interrupt_stats_attr_group);
+	return;
+
+fail:
+	delete_gpe_attr_array();
+	return;
+}
+
+static void __exit interrupt_stats_exit(void)
+{
+	sysfs_remove_group(acpi_kobj, &interrupt_stats_attr_group);
+
+	delete_gpe_attr_array();
+
+	return;
+}
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
diff --git a/drivers/acpi/utilities/utglobal.c b/drivers/acpi/utilities/utglobal.c
index 93ea8290b4f7..630c9a2c5b7b 100644
--- a/drivers/acpi/utilities/utglobal.c
+++ b/drivers/acpi/utilities/utglobal.c
@@ -671,7 +671,6 @@ void acpi_ut_init_globals(void)
 
 	/* GPE support */
 
-	acpi_gpe_count = 0;
 	acpi_gbl_gpe_xrupt_list_head = NULL;
 	acpi_gbl_gpe_fadt_blocks[0] = NULL;
 	acpi_gbl_gpe_fadt_blocks[1] = NULL;
@@ -735,4 +734,3 @@ void acpi_ut_init_globals(void)
 
 ACPI_EXPORT_SYMBOL(acpi_dbg_level)
     ACPI_EXPORT_SYMBOL(acpi_dbg_layer)
-    ACPI_EXPORT_SYMBOL(acpi_gpe_count)
diff --git a/include/acpi/acglobal.h b/include/acpi/acglobal.h
index 347a911d8237..47a1fd8f2d8a 100644
--- a/include/acpi/acglobal.h
+++ b/include/acpi/acglobal.h
@@ -117,10 +117,6 @@ extern u32 acpi_dbg_layer;
 
 extern u32 acpi_gbl_nesting_level;
 
-/* Event counters */
-
-ACPI_EXTERN u32 acpi_gpe_count;
-
 /* Support for dynamic control method tracing mechanism */
 
 ACPI_EXTERN u32 acpi_gbl_original_dbg_level;
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index ca882b8e7d10..1a16cfbe9e0d 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -181,6 +181,9 @@ acpi_os_install_interrupt_handler(u32 gsi,
 acpi_status
 acpi_os_remove_interrupt_handler(u32 gsi, acpi_osd_handler service_routine);
 
+void acpi_os_gpe_count(u32 gpe_number);
+void acpi_os_fixed_event_count(u32 fixed_event_number);
+
 /*
  * Threads and Scheduling
  */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63f2e6ed698f..cb911f3e40f5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -115,7 +115,9 @@ int acpi_unmap_lsapic(int cpu);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
+void acpi_irq_stats_init(void);
 
+extern u32 acpi_irq_handled;
 extern int acpi_mp_config;
 
 extern struct acpi_mcfg_allocation *pci_mmcfg_config;
-- 
cgit 


From 9e76988e9390a4ff4d171f690586d0c58186b47e Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 26 Oct 2007 10:18:21 -0700
Subject: [CPUFREQ] Eliminate cpufreq_userspace scaling_setspeed deadlock

Eliminate cpufreq_userspace scaling_setspeed deadlock.

Luming Yu recently uncovered yet another cpufreq related deadlock.
One thread that continuously switches the governors and the other thread that
repeatedly cats the contents of cpufreq directory causes both these threads to
go into a deadlock.

Detailed examination of the deadlock showed the exact flow before the deadlock
as:

Thread 1			Thread 2
________			________
				cats files under /sys/devices/.../cpufreq/
Set governor to userspace
  Adds a new sysfs entry for
  scaling_setspeed
				cats files under /sys/devices/.../cpufreq/

Set governor to performance
  Holds cpufreq_rw_sem in write
  mode
  Sends a STOP notify to
  userspace governor
				cat /sys/devices/.../cpufreq/scaling_setspeed
				  Gets a handle on the above sysfs entry with
				  sysfs_get_active
				  Blocks while trying to get cpufreq_rw_sem
				  in read mode
  Remove a sysfs entry for
  scaling_setspeed
    Blocks on sysfs_deactivate
    while waiting for earlier
    get_active (on other thread)
    to drain

At this point both threads go into deadlock and any other thread that tries to
do anything with sysfs cpufreq will also block.

There seems to be no easy way to avoid this deadlock as long as
cpufreq_userspace adds/removes the sysfs entry under same kobject as cpufreq.
Below patch moves scaling_setspeed to cpufreq.c, keeping it always and calling
back the governor on read/write. This is the cleanest fix I could think of,
even though adding two callbacks in governor structure just for this seems
unnecessary.

Note that the change makes scaling_setspeed under /sys/.../cpufreq permanent
and returns <unsupported> when governor is not userspace.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq.c           | 27 +++++++++++++++++++++++++
 drivers/cpufreq/cpufreq_userspace.c | 40 +++++++------------------------------
 include/linux/cpufreq.h             |  4 ++++
 3 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 613314851ecc..64926aa990db 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -601,6 +601,31 @@ static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
 	return i;
 }
 
+static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
+		const char *buf, size_t count)
+{
+	unsigned int freq = 0;
+	unsigned int ret;
+
+	if (!policy->governor->store_setspeed)
+		return -EINVAL;
+
+	ret = sscanf(buf, "%u", &freq);
+	if (ret != 1)
+		return -EINVAL;
+
+	policy->governor->store_setspeed(policy, freq);
+
+	return count;
+}
+
+static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
+{
+	if (!policy->governor->show_setspeed)
+		return sprintf(buf, "<unsupported>\n");
+
+	return policy->governor->show_setspeed(policy, buf);
+}
 
 #define define_one_ro(_name) \
 static struct freq_attr _name = \
@@ -624,6 +649,7 @@ define_one_ro(affected_cpus);
 define_one_rw(scaling_min_freq);
 define_one_rw(scaling_max_freq);
 define_one_rw(scaling_governor);
+define_one_rw(scaling_setspeed);
 
 static struct attribute * default_attrs[] = {
 	&cpuinfo_min_freq.attr,
@@ -634,6 +660,7 @@ static struct attribute * default_attrs[] = {
 	&scaling_governor.attr,
 	&scaling_driver.attr,
 	&scaling_available_governors.attr,
+	&scaling_setspeed.attr,
 	NULL
 };
 
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index f8cdde4bf6cd..cb2ac01a41a1 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -65,12 +65,12 @@ static struct notifier_block userspace_cpufreq_notifier_block = {
 
 /**
  * cpufreq_set - set the CPU frequency
+ * @policy: pointer to policy struct where freq is being set
  * @freq: target frequency in kHz
- * @cpu: CPU for which the frequency is to be set
  *
  * Sets the CPU frequency to freq.
  */
-static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy)
+static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
 
@@ -102,34 +102,11 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy)
 }
 
 
-/************************** sysfs interface ************************/
-static ssize_t show_speed (struct cpufreq_policy *policy, char *buf)
+static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
 {
-	return sprintf (buf, "%u\n", cpu_cur_freq[policy->cpu]);
+	return sprintf(buf, "%u\n", cpu_cur_freq[policy->cpu]);
 }
 
-static ssize_t
-store_speed (struct cpufreq_policy *policy, const char *buf, size_t count)
-{
-	unsigned int freq = 0;
-	unsigned int ret;
-
-	ret = sscanf (buf, "%u", &freq);
-	if (ret != 1)
-		return -EINVAL;
-
-	cpufreq_set(freq, policy);
-
-	return count;
-}
-
-static struct freq_attr freq_attr_scaling_setspeed =
-{
-	.attr = { .name = "scaling_setspeed", .mode = 0644 },
-	.show = show_speed,
-	.store = store_speed,
-};
-
 static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 				   unsigned int event)
 {
@@ -142,10 +119,6 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 			return -EINVAL;
 		BUG_ON(!policy->cur);
 		mutex_lock(&userspace_mutex);
-		rc = sysfs_create_file (&policy->kobj,
-					&freq_attr_scaling_setspeed.attr);
-		if (rc)
-			goto start_out;
 
 		if (cpus_using_userspace_governor == 0) {
 			cpufreq_register_notifier(
@@ -160,7 +133,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		cpu_cur_freq[cpu] = policy->cur;
 		cpu_set_freq[cpu] = policy->cur;
 		dprintk("managing cpu %u started (%u - %u kHz, currently %u kHz)\n", cpu, cpu_min_freq[cpu], cpu_max_freq[cpu], cpu_cur_freq[cpu]);
-start_out:
+
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_STOP:
@@ -176,7 +149,6 @@ start_out:
 		cpu_min_freq[cpu] = 0;
 		cpu_max_freq[cpu] = 0;
 		cpu_set_freq[cpu] = 0;
-		sysfs_remove_file (&policy->kobj, &freq_attr_scaling_setspeed.attr);
 		dprintk("managing cpu %u stopped\n", cpu);
 		mutex_unlock(&userspace_mutex);
 		break;
@@ -211,6 +183,8 @@ start_out:
 struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
 	.governor	= cpufreq_governor_userspace,
+	.store_setspeed	= cpufreq_set,
+	.show_setspeed	= show_speed,
 	.owner		= THIS_MODULE,
 };
 EXPORT_SYMBOL(cpufreq_gov_userspace);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 23932d7741a9..ddd8652fc3f3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -167,6 +167,10 @@ struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
 	int 	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
+	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
+					 char *buf);
+	int 	(*store_setspeed)	(struct cpufreq_policy *policy,
+					 unsigned int freq);
 	unsigned int max_transition_latency; /* HW must be able to switch to
 			next freq faster than this value in nano secs or we
 			will fallback to performance governor */
-- 
cgit 


From 1c69fc2a9012e160c8d459f63df74a6b01db8322 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 6 Feb 2008 21:07:54 -0800
Subject: IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf

We use struct mlx4_buf for kernel QP, CQ and SRQ buffers, and the code
to look up an entry is duplicated in get_cqe_from_buf() and the QP and
SRQ versions of get_wqe().  Factor this out into mlx4_buf_offset().

This will also make it easier to switch over to using vmap() for buffers.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c  | 8 +-------
 drivers/infiniband/hw/mlx4/qp.c  | 6 +-----
 drivers/infiniband/hw/mlx4/srq.c | 8 +-------
 include/linux/mlx4/device.h      | 8 ++++++++
 4 files changed, 11 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7950aa6e8184..8ac7b973f870 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -64,13 +64,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
 
 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
 {
-	int offset = n * sizeof (struct mlx4_cqe);
-
-	if (buf->buf.nbufs == 1)
-		return buf->buf.u.direct.buf + offset;
-	else
-		return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-			(offset & (PAGE_SIZE - 1));
+	return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
 }
 
 static void *get_cqe(struct mlx4_ib_cq *cq, int n)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8cba9c532e64..376db730bc75 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -96,11 +96,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 
 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
 {
-	if (qp->buf.nbufs == 1)
-		return qp->buf.u.direct.buf + offset;
-	else
-		return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-			(offset & (PAGE_SIZE - 1));
+	return mlx4_buf_offset(&qp->buf, offset);
 }
 
 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index e7e9a3d0dac3..beaa3b06cf58 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -38,13 +38,7 @@
 
 static void *get_wqe(struct mlx4_ib_srq *srq, int n)
 {
-	int offset = n << srq->msrq.wqe_shift;
-
-	if (srq->buf.nbufs == 1)
-		return srq->buf.u.direct.buf + offset;
-	else
-		return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
-			(offset & (PAGE_SIZE - 1));
+	return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
 }
 
 static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 222815d91c40..a0afa7511a30 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -308,6 +308,14 @@ struct mlx4_init_port_param {
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
+static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
+{
+	if (buf->nbufs == 1)
+		return buf->u.direct.buf + offset;
+	else
+		return buf->u.page_list[offset >> PAGE_SHIFT].buf +
+			(offset & (PAGE_SIZE - 1));
+}
 
 int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
 void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
-- 
cgit 


From 313abe55a87bc10e55d00f337d609e17ad5f8c9a Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 28 Jan 2008 10:40:51 +0200
Subject: mlx4_core: For 64-bit systems, vmap() kernel queue buffers

Since kernel virtual memory is not a problem on 64-bit systems, there
is no reason to use our own 2-layer page mapping scheme for large
kernel queue buffers on such systems.  Instead, map the page list to a
single virtually contiguous buffer with vmap(), so that can we access
buffer memory via direct indexing.

Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/alloc.c    | 16 ++++++++++++++++
 include/linux/mlx4/device.h |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index b226e019bc8b..2da2c2ec1f22 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -151,6 +151,19 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
 			memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
 		}
+
+		if (BITS_PER_LONG == 64) {
+			struct page **pages;
+			pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+			if (!pages)
+				goto err_free;
+			for (i = 0; i < buf->nbufs; ++i)
+				pages[i] = virt_to_page(buf->u.page_list[i].buf);
+			buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+			kfree(pages);
+			if (!buf->u.direct.buf)
+				goto err_free;
+		}
 	}
 
 	return 0;
@@ -170,6 +183,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 		dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
 				  buf->u.direct.map);
 	else {
+		if (BITS_PER_LONG == 64)
+			vunmap(buf->u.direct.buf);
+
 		for (i = 0; i < buf->nbufs; ++i)
 			if (buf->u.page_list[i].buf)
 				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a0afa7511a30..631607788f83 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,7 +189,7 @@ struct mlx4_buf_list {
 };
 
 struct mlx4_buf {
-	union {
+	struct {
 		struct mlx4_buf_list	direct;
 		struct mlx4_buf_list   *page_list;
 	} u;
@@ -310,7 +310,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
-	if (buf->nbufs == 1)
+	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
 		return buf->u.direct.buf + offset;
 	else
 		return buf->u.page_list[offset >> PAGE_SHIFT].buf +
-- 
cgit 


From b57aacfa7a95328f469d0360e49289b023c47e9e Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 6 Feb 2008 21:17:59 -0800
Subject: mlx4_core: Clean up struct mlx4_buf

Now that struct mlx4_buf.u is a struct instead of a union because of
the vmap() changes, there's no point in having a struct at all.  So
move .direct and .page_list directly into struct mlx4_buf and get rid
of a bunch of unnecessary ".u"s.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/alloc.c    | 40 ++++++++++++++++++++--------------------
 drivers/net/mlx4/mr.c       |  4 ++--
 include/linux/mlx4/device.h | 10 ++++------
 3 files changed, 26 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 2da2c2ec1f22..521dc0322ee4 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -116,40 +116,40 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->nbufs        = 1;
 		buf->npages       = 1;
 		buf->page_shift   = get_order(size) + PAGE_SHIFT;
-		buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+		buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
 						       size, &t, GFP_KERNEL);
-		if (!buf->u.direct.buf)
+		if (!buf->direct.buf)
 			return -ENOMEM;
 
-		buf->u.direct.map = t;
+		buf->direct.map = t;
 
 		while (t & ((1 << buf->page_shift) - 1)) {
 			--buf->page_shift;
 			buf->npages *= 2;
 		}
 
-		memset(buf->u.direct.buf, 0, size);
+		memset(buf->direct.buf, 0, size);
 	} else {
 		int i;
 
 		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 		buf->npages      = buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
-		buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list,
+		buf->page_list   = kzalloc(buf->nbufs * sizeof *buf->page_list,
 					   GFP_KERNEL);
-		if (!buf->u.page_list)
+		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
-			buf->u.page_list[i].buf =
+			buf->page_list[i].buf =
 				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
 						   &t, GFP_KERNEL);
-			if (!buf->u.page_list[i].buf)
+			if (!buf->page_list[i].buf)
 				goto err_free;
 
-			buf->u.page_list[i].map = t;
+			buf->page_list[i].map = t;
 
-			memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
+			memset(buf->page_list[i].buf, 0, PAGE_SIZE);
 		}
 
 		if (BITS_PER_LONG == 64) {
@@ -158,10 +158,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 			if (!pages)
 				goto err_free;
 			for (i = 0; i < buf->nbufs; ++i)
-				pages[i] = virt_to_page(buf->u.page_list[i].buf);
-			buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+				pages[i] = virt_to_page(buf->page_list[i].buf);
+			buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
 			kfree(pages);
-			if (!buf->u.direct.buf)
+			if (!buf->direct.buf)
 				goto err_free;
 		}
 	}
@@ -180,18 +180,18 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 	int i;
 
 	if (buf->nbufs == 1)
-		dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
-				  buf->u.direct.map);
+		dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+				  buf->direct.map);
 	else {
 		if (BITS_PER_LONG == 64)
-			vunmap(buf->u.direct.buf);
+			vunmap(buf->direct.buf);
 
 		for (i = 0; i < buf->nbufs; ++i)
-			if (buf->u.page_list[i].buf)
+			if (buf->page_list[i].buf)
 				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
-						  buf->u.page_list[i].buf,
-						  buf->u.page_list[i].map);
-		kfree(buf->u.page_list);
+						  buf->page_list[i].buf,
+						  buf->page_list[i].map);
+		kfree(buf->page_list);
 	}
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 9c9e308d0917..679dfdb6807f 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -419,9 +419,9 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 
 	for (i = 0; i < buf->npages; ++i)
 		if (buf->nbufs == 1)
-			page_list[i] = buf->u.direct.map + (i << buf->page_shift);
+			page_list[i] = buf->direct.map + (i << buf->page_shift);
 		else
-			page_list[i] = buf->u.page_list[i].map;
+			page_list[i] = buf->page_list[i].map;
 
 	err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 631607788f83..4210ac4a8bcd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,10 +189,8 @@ struct mlx4_buf_list {
 };
 
 struct mlx4_buf {
-	struct {
-		struct mlx4_buf_list	direct;
-		struct mlx4_buf_list   *page_list;
-	} u;
+	struct mlx4_buf_list	direct;
+	struct mlx4_buf_list   *page_list;
 	int			nbufs;
 	int			npages;
 	int			page_shift;
@@ -311,9 +309,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
 	if (BITS_PER_LONG == 64 || buf->nbufs == 1)
-		return buf->u.direct.buf + offset;
+		return buf->direct.buf + offset;
 	else
-		return buf->u.page_list[offset >> PAGE_SHIFT].buf +
+		return buf->page_list[offset >> PAGE_SHIFT].buf +
 			(offset & (PAGE_SIZE - 1));
 }
 
-- 
cgit 


From df92e695998e1bc6e426a840eb86d6d1ee87e2a5 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Mon, 4 Feb 2008 23:31:22 -0800
Subject: ACPI: track opregion names to avoid driver resource conflicts.

Small ACPICA extension to be able to store the name of operation regions in osl.c later

In ACPI, AML can define accesses to IO ports and System Memory by Operation
Regions.  Those are not registered as done by PNPACPI using resource templates
(and _CRS/_SRS methods).

The IO ports and System Memory regions may get accessed by arbitrary AML code.
 When native drivers are accessing the same resources bad things can happen
(e.g.  a critical shutdown temperature of 3000 C every 2 months or so).

It is not really possible to register the operation regions via
request_resource, as they often overlap with pnp or other resources (e.g.
statically setup IO resources below 0x100).

This approach stores all Operation Region declarations (IO and System Memory
only) at ACPI table parse time.  It offers a similar functionality like
request_region and let drivers which are known to possibly use the same IO
ports and Memory which are also often used by ACPI (hwmon and i2c) check for
ACPI interference.

A boot parameter acpi_enforce_resources=strict/lax/no is provided, which
is default set to lax:
  - strict: let conflicting drivers fail to load with an error message
  - lax:    let conflicting driver work normal with a warning message
  - no:     no functional change at all
Depending on the feedback and the kind of interferences we see, this
should be set to strict at later time.

Goal of this patch set is:
  - Identify ACPI interferences in bug reports (very hard to reproduce
    and to identify)
  - Find BIOSes for that an ACPI driver should exist for specific HW
    instead of a native one.
  - stability in general

Provide acpi_check_{mem_}region.

Drivers can additionally check against possible ACPI interference by also
invoking this shortly before they call request_region.
If -EBUSY is returned, the driver must not load.
Use acpi_enforce_resources=strict/lax/no options to:
  - strict: let conflicting drivers fail to load with an error message
  - lax:    let conflicting driver work normal with a warning message
  - no:     no functional change at all

Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/dispatcher/dsopcode.c |   4 +-
 drivers/acpi/osl.c                 | 175 ++++++++++++++++++++++++++++++++++++-
 include/acpi/acpiosxf.h            |   4 +-
 include/linux/acpi.h               |  17 ++++
 4 files changed, 195 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/dispatcher/dsopcode.c
index fc9da4879cbf..f501e083aac7 100644
--- a/drivers/acpi/dispatcher/dsopcode.c
+++ b/drivers/acpi/dispatcher/dsopcode.c
@@ -359,7 +359,9 @@ acpi_status acpi_ds_get_region_arguments(union acpi_operand_object *obj_desc)
 
 	status = acpi_os_validate_address(obj_desc->region.space_id,
 					  obj_desc->region.address,
-					  (acpi_size) obj_desc->region.length);
+					  (acpi_size) obj_desc->region.length,
+					  acpi_ut_get_node_name(node));
+
 	if (ACPI_FAILURE(status)) {
 		/*
 		 * Invalid address/length. We will emit an error message and mark
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index e53fb516f9d4..222f7b1b66f7 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -44,6 +44,8 @@
 #include <asm/uaccess.h>
 
 #include <linux/efi.h>
+#include <linux/ioport.h>
+#include <linux/list.h>
 
 #define _COMPONENT		ACPI_OS_SERVICES
 ACPI_MODULE_NAME("osl");
@@ -74,6 +76,18 @@ static void *acpi_irq_context;
 static struct workqueue_struct *kacpid_wq;
 static struct workqueue_struct *kacpi_notify_wq;
 
+struct acpi_res_list {
+	resource_size_t start;
+	resource_size_t end;
+	acpi_adr_space_type resource_type; /* IO port, System memory, ...*/
+	char name[5];   /* only can have a length of 4 chars, make use of this
+			   one instead of res->name, no need to kalloc then */
+	struct list_head resource_list;
+};
+
+static LIST_HEAD(resource_list_head);
+static DEFINE_SPINLOCK(acpi_res_lock);
+
 #define	OSI_STRING_LENGTH_MAX 64	/* arbitrary */
 static char osi_additional_string[OSI_STRING_LENGTH_MAX];
 
@@ -1102,6 +1116,127 @@ static int __init acpi_wake_gpes_always_on_setup(char *str)
 
 __setup("acpi_wake_gpes_always_on", acpi_wake_gpes_always_on_setup);
 
+/* Check of resource interference between native drivers and ACPI
+ * OperationRegions (SystemIO and System Memory only).
+ * IO ports and memory declared in ACPI might be used by the ACPI subsystem
+ * in arbitrary AML code and can interfere with legacy drivers.
+ * acpi_enforce_resources= can be set to:
+ *
+ *   - strict           (2)
+ *     -> further driver trying to access the resources will not load
+ *   - lax (default)    (1)
+ *     -> further driver trying to access the resources will load, but you
+ *     get a system message that something might go wrong...
+ *
+ *   - no               (0)
+ *     -> ACPI Operation Region resources will not be registered
+ *
+ */
+#define ENFORCE_RESOURCES_STRICT 2
+#define ENFORCE_RESOURCES_LAX    1
+#define ENFORCE_RESOURCES_NO     0
+
+static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
+
+static int __init acpi_enforce_resources_setup(char *str)
+{
+	if (str == NULL || *str == '\0')
+		return 0;
+
+	if (!strcmp("strict", str))
+		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
+	else if (!strcmp("lax", str))
+		acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
+	else if (!strcmp("no", str))
+		acpi_enforce_resources = ENFORCE_RESOURCES_NO;
+
+	return 1;
+}
+
+__setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
+
+/* Check for resource conflicts between ACPI OperationRegions and native
+ * drivers */
+static int acpi_check_resource_conflict(struct resource *res)
+{
+	struct acpi_res_list *res_list_elem;
+	int ioport;
+	int clash = 0;
+
+	if (acpi_enforce_resources == ENFORCE_RESOURCES_NO)
+		return 0;
+	if (!(res->flags & IORESOURCE_IO) && !(res->flags & IORESOURCE_MEM))
+		return 0;
+
+	ioport = res->flags & IORESOURCE_IO;
+
+	spin_lock(&acpi_res_lock);
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+		if (ioport && (res_list_elem->resource_type
+			       != ACPI_ADR_SPACE_SYSTEM_IO))
+			continue;
+		if (!ioport && (res_list_elem->resource_type
+				!= ACPI_ADR_SPACE_SYSTEM_MEMORY))
+			continue;
+
+		if (res->end < res_list_elem->start
+		    || res_list_elem->end < res->start)
+			continue;
+		clash = 1;
+		break;
+	}
+	spin_unlock(&acpi_res_lock);
+
+	if (clash) {
+		if (acpi_enforce_resources != ENFORCE_RESOURCES_NO) {
+			printk(KERN_INFO "%sACPI: %s resource %s [0x%llx-0x%llx]"
+			       " conflicts with ACPI region %s"
+			       " [0x%llx-0x%llx]\n",
+			       acpi_enforce_resources == ENFORCE_RESOURCES_LAX
+			       ? KERN_WARNING : KERN_ERR,
+			       ioport ? "I/O" : "Memory", res->name,
+			       (long long) res->start, (long long) res->end,
+			       res_list_elem->name,
+			       (long long) res_list_elem->start,
+			       (long long) res_list_elem->end);
+			printk(KERN_INFO "ACPI: Device needs an ACPI driver\n");
+		}
+		if (acpi_enforce_resources == ENFORCE_RESOURCES_STRICT)
+			return -EBUSY;
+	}
+	return 0;
+}
+
+int acpi_check_region(resource_size_t start, resource_size_t n,
+		      const char *name)
+{
+	struct resource res = {
+		.start = start,
+		.end   = start + n - 1,
+		.name  = name,
+		.flags = IORESOURCE_IO,
+	};
+
+	return acpi_check_resource_conflict(&res);
+}
+EXPORT_SYMBOL(acpi_check_region);
+
+int acpi_check_mem_region(resource_size_t start, resource_size_t n,
+		      const char *name)
+{
+	struct resource res = {
+		.start = start,
+		.end   = start + n - 1,
+		.name  = name,
+		.flags = IORESOURCE_MEM,
+	};
+
+	return acpi_check_resource_conflict(&res);
+
+}
+EXPORT_SYMBOL(acpi_check_mem_region);
+
 /*
  * Acquire a spinlock.
  *
@@ -1303,10 +1438,46 @@ acpi_status
 acpi_os_validate_address (
     u8                   space_id,
     acpi_physical_address   address,
-    acpi_size               length)
+    acpi_size               length,
+    char *name)
 {
+	struct acpi_res_list *res;
+	if (acpi_enforce_resources == ENFORCE_RESOURCES_NO)
+		return AE_OK;
 
-    return AE_OK;
+	switch (space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		/* Only interference checks against SystemIO and SytemMemory
+		   are needed */
+		res = kzalloc(sizeof(struct acpi_res_list), GFP_KERNEL);
+		if (!res)
+			return AE_OK;
+		/* ACPI names are fixed to 4 bytes, still better use strlcpy */
+		strlcpy(res->name, name, 5);
+		res->start = address;
+		res->end = address + length - 1;
+		res->resource_type = space_id;
+		spin_lock(&acpi_res_lock);
+		list_add(&res->resource_list, &resource_list_head);
+		spin_unlock(&acpi_res_lock);
+		pr_debug("Added %s resource: start: 0x%llx, end: 0x%llx, "
+			 "name: %s\n", (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+			 ? "SystemIO" : "System Memory",
+			 (unsigned long long)res->start,
+			 (unsigned long long)res->end,
+			 res->name);
+		break;
+	case ACPI_ADR_SPACE_PCI_CONFIG:
+	case ACPI_ADR_SPACE_EC:
+	case ACPI_ADR_SPACE_SMBUS:
+	case ACPI_ADR_SPACE_CMOS:
+	case ACPI_ADR_SPACE_PCI_BAR_TARGET:
+	case ACPI_ADR_SPACE_DATA_TABLE:
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		break;
+	}
+	return AE_OK;
 }
 
 #endif
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index ca882b8e7d10..c082c7de88a0 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -239,8 +239,8 @@ acpi_status acpi_os_validate_interface(char *interface);
 acpi_status acpi_osi_invalidate(char* interface);
 
 acpi_status
-acpi_os_validate_address(u8 space_id,
-			 acpi_physical_address address, acpi_size length);
+acpi_os_validate_address(u8 space_id, acpi_physical_address address,
+			 acpi_size length, char *name);
 
 u64 acpi_os_get_timer(void);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63f2e6ed698f..893f90a5dea9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -217,6 +217,11 @@ extern int pnpacpi_disabled;
 #define PXM_INVAL	(-1)
 #define NID_INVAL	(-1)
 
+int acpi_check_region(resource_size_t start, resource_size_t n,
+		      const char *name);
+int acpi_check_mem_region(resource_size_t start, resource_size_t n,
+		      const char *name);
+
 #else	/* CONFIG_ACPI */
 
 static inline int acpi_boot_init(void)
@@ -229,5 +234,17 @@ static inline int acpi_boot_table_init(void)
 	return 0;
 }
 
+static inline int acpi_check_region(resource_size_t start, resource_size_t n,
+				    const char *name)
+{
+	return 0;
+}
+
+static inline int acpi_check_mem_region(resource_size_t start,
+					resource_size_t n, const char *name)
+{
+	return 0;
+}
+
 #endif	/* !CONFIG_ACPI */
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit 


From 443dea72d5f428170de6d6e3c4c1a1e2b7632b65 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Mon, 4 Feb 2008 23:31:23 -0800
Subject: ACPI: Export acpi_check_resource_conflict

Export acpi_check_resource_conflict(), sometimes drivers already have
a struct resource at hand so no need to use the wrappers to build a new
one.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c   | 3 ++-
 include/linux/acpi.h | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 222f7b1b66f7..bc1604bfa4db 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1157,7 +1157,7 @@ __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
 
 /* Check for resource conflicts between ACPI OperationRegions and native
  * drivers */
-static int acpi_check_resource_conflict(struct resource *res)
+int acpi_check_resource_conflict(struct resource *res)
 {
 	struct acpi_res_list *res_list_elem;
 	int ioport;
@@ -1207,6 +1207,7 @@ static int acpi_check_resource_conflict(struct resource *res)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(acpi_check_resource_conflict);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 893f90a5dea9..a031df8c83ae 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -25,6 +25,7 @@
 #ifndef _LINUX_ACPI_H
 #define _LINUX_ACPI_H
 
+#include <linux/ioport.h>	/* for struct resource */
 
 #ifdef	CONFIG_ACPI
 
@@ -217,6 +218,8 @@ extern int pnpacpi_disabled;
 #define PXM_INVAL	(-1)
 #define NID_INVAL	(-1)
 
+int acpi_check_resource_conflict(struct resource *res);
+
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
 int acpi_check_mem_region(resource_size_t start, resource_size_t n,
@@ -234,6 +237,11 @@ static inline int acpi_boot_table_init(void)
 	return 0;
 }
 
+static inline int acpi_check_resource_conflict(struct resource *res)
+{
+	return 0;
+}
+
 static inline int acpi_check_region(resource_size_t start, resource_size_t n,
 				    const char *name)
 {
-- 
cgit 


From 9a0b841586c3c6c846effdbe75885c2ebc0031b0 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 31 Jan 2008 17:35:06 -0800
Subject: cpuidle: Add a poll_idle method

Add a default poll idle state with 0 latency. Provides an option to users
to use poll_idle by using 0 as the latency requirement.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/Kconfig              |  3 +++
 drivers/acpi/processor_idle.c |  4 +++-
 drivers/cpuidle/cpuidle.c     | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/cpuidle.h       | 13 ++++++++++---
 4 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e6728bd61cc1..fd4265007053 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -105,6 +105,9 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default X86_64
 
+config ARCH_HAS_CPU_RELAX
+	def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool X86_64
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index fea71597b40a..32488e6f76ab 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1628,7 +1628,7 @@ struct cpuidle_driver acpi_idle_driver = {
  */
 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 {
-	int i, count = 0;
+	int i, count = CPUIDLE_DRIVER_STATE_START;
 	struct acpi_processor_cx *cx;
 	struct cpuidle_state *state;
 	struct cpuidle_device *dev = &pr->power.dev;
@@ -1687,6 +1687,8 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 		}
 
 		count++;
+		if (count == CPUIDLE_STATE_MAX)
+			break;
 	}
 
 	dev->state_count = count;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 2a98d99cbd46..2c4b2d47973e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -15,6 +15,7 @@
 #include <linux/pm_qos_params.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
+#include <linux/ktime.h>
 
 #include "cpuidle.h"
 
@@ -180,6 +181,44 @@ void cpuidle_disable_device(struct cpuidle_device *dev)
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 
+#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+	ktime_t	t1, t2;
+	s64 diff;
+	int ret;
+
+	t1 = ktime_get();
+	local_irq_enable();
+	while (!need_resched())
+		cpu_relax();
+
+	t2 = ktime_get();
+	diff = ktime_to_us(ktime_sub(t2, t1));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
+
+	ret = (int) diff;
+	return ret;
+}
+
+static void poll_idle_init(struct cpuidle_device *dev)
+{
+	struct cpuidle_state *state = &dev->states[0];
+
+	cpuidle_set_statedata(state, NULL);
+
+	snprintf(state->name, CPUIDLE_NAME_LEN, "C0 (poll idle)");
+	state->exit_latency = 0;
+	state->target_residency = 0;
+	state->power_usage = -1;
+	state->flags = CPUIDLE_FLAG_POLL | CPUIDLE_FLAG_TIME_VALID;
+	state->enter = poll_idle;
+}
+#else
+static void poll_idle_init(struct cpuidle_device *dev) {}
+#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
+
 /**
  * cpuidle_register_device - registers a CPU's idle PM feature
  * @dev: the cpu
@@ -198,6 +237,8 @@ int cpuidle_register_device(struct cpuidle_device *dev)
 
 	mutex_lock(&cpuidle_lock);
 
+	poll_idle_init(dev);
+
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
 	if ((ret = cpuidle_add_sysfs(sys_dev))) {
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b0fd85ab9efb..385d45b616db 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -46,9 +46,10 @@ struct cpuidle_state {
 /* Idle State Flags */
 #define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
-#define CPUIDLE_FLAG_SHALLOW	(0x10) /* low latency, minimal savings */
-#define CPUIDLE_FLAG_BALANCED	(0x20) /* medium latency, moderate savings */
-#define CPUIDLE_FLAG_DEEP	(0x40) /* high latency, large savings */
+#define CPUIDLE_FLAG_POLL	(0x10) /* no latency, no savings */
+#define CPUIDLE_FLAG_SHALLOW	(0x20) /* low latency, minimal savings */
+#define CPUIDLE_FLAG_BALANCED	(0x40) /* medium latency, moderate savings */
+#define CPUIDLE_FLAG_DEEP	(0x80) /* high latency, large savings */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
@@ -178,4 +179,10 @@ static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
 
 #endif
 
+#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+#define CPUIDLE_DRIVER_STATE_START	1
+#else
+#define CPUIDLE_DRIVER_STATE_START	0
+#endif
+
 #endif /* _LINUX_CPUIDLE_H */
-- 
cgit 


From ee1ce6fcb383ba1da9d16e1ba902f592211aa508 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 6 Dec 2007 22:00:36 -0500
Subject: ACPI: cleanup acpi.h

Two cleanups to <linux/acpi.h>:
* Stop defining acpi_mp_config, it isn't used anywhere.
* Discard nested "#ifdef CONFIG_ACPI", they are useless and
  error-prone.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63f2e6ed698f..ff68f13a9377 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -43,8 +43,6 @@
 #include <linux/dmi.h>
 
 
-#ifdef CONFIG_ACPI
-
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
 	ACPI_IRQ_MODEL_IOAPIC,
@@ -116,20 +114,12 @@ int acpi_unmap_lsapic(int cpu);
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
 
-extern int acpi_mp_config;
-
 extern struct acpi_mcfg_allocation *pci_mmcfg_config;
 extern int pci_mmcfg_config_num;
 
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-#else	/* !CONFIG_ACPI */
-
-#define acpi_mp_config	0
-
-#endif 	/* !CONFIG_ACPI */
-
 int acpi_register_gsi (u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
@@ -145,8 +135,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity);
  */
 void acpi_unregister_gsi (u32 gsi);
 
-#ifdef CONFIG_ACPI
-
 struct acpi_prt_entry {
 	struct list_head	node;
 	struct acpi_pci_id	id;
@@ -179,8 +167,6 @@ struct acpi_pci_driver {
 int acpi_pci_register_driver(struct acpi_pci_driver *driver);
 void acpi_pci_unregister_driver(struct acpi_pci_driver *driver);
 
-#endif /* CONFIG_ACPI */
-
 #ifdef CONFIG_ACPI_EC
 
 extern int ec_read(u8 addr, u8 *val);
-- 
cgit 


From 9b7131542178f5f948e4bb6bea6e1c545e697b06 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 7 Feb 2008 04:16:34 -0500
Subject: Revert "cpuidle: build fix for non-x86"

This reverts commit f757397097d0713c949af76dccabb65a2785782e.
which ironically broke the ia64 build
---
 arch/x86/Kconfig          |  3 ---
 drivers/cpuidle/cpuidle.c |  2 +-
 include/linux/cpuidle.h   | 13 -------------
 3 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3954ae96b0c7..fd4265007053 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -98,9 +98,6 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	def_bool n
 
-config ARCH_HAS_CPU_IDLE_WAIT
-	def_bool y
-
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d868d737742f..2c4b2d47973e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -83,7 +83,7 @@ void cpuidle_uninstall_idle_handler(void)
 {
 	if (enabled_devices && (pm_idle != pm_idle_old)) {
 		pm_idle = pm_idle_old;
-		cpuidle_kick_cpus();
+		cpu_idle_wait();
 	}
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c8eb8c71809e..385d45b616db 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -73,19 +73,6 @@ cpuidle_set_statedata(struct cpuidle_state *state, void *data)
 	state->driver_data = data;
 }
 
-#ifdef CONFIG_SMP
-#ifdef CONFIG_ARCH_HAS_CPU_IDLE_WAIT
-static inline void cpuidle_kick_cpus(void)
-{
-	cpu_idle_wait();
-}
-#else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT */
-#error "Arch needs cpu_idle_wait() equivalent here"
-#endif /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT */
-#else /* !CONFIG_SMP */
-static inline void cpuidle_kick_cpus(void) {}
-#endif /* !CONFIG_SMP */
-
 struct cpuidle_state_kobj {
 	struct cpuidle_state *state;
 	struct completion kobj_unregister;
-- 
cgit 


From 24f1a849614ba1805e26a05da7cc8c6bd67490ea Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 2 Jan 2008 17:05:48 -0800
Subject: [POWERPC] Add SPE registers to core dumps

This makes the SPE register data appear in ELF core dumps, using the
new n_type value NT_PPC_SPE (0x101).  This new note type is not used
by any consumers of core files yet, but support can be added.  I don't
even have any hardware with SPE capabilities, so I've never seen such
a note.  But this demonstrates how simple it is to export register
information in core dumps when the user_regset style is used for the
low-level code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/ptrace.c | 2 +-
 include/linux/elf.h          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 7571c2e2d991..7673e9865733 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -581,7 +581,7 @@ static const struct user_regset compat_regsets[] = {
 #endif
 #ifdef CONFIG_SPE
 	[REGSET_SPE] = {
-		.n = 35,
+		.core_note_type = NT_PPC_SPE, .n = 35,
 		.size = sizeof(u32), .align = sizeof(u32),
 		.active = evr_active, .get = evr_get, .set = evr_set
 	},
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 7ceb24d87c1a..30eb6fbd5323 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -355,6 +355,7 @@ typedef struct elf64_shdr {
 #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
 #define NT_PPC_VMX	0x100		/* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE	0x101		/* PowerPC SPE/EVR registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 
 
-- 
cgit 


From 4c79141d28bc290ae307e3f81f5bc909c26faf6e Mon Sep 17 00:00:00 2001
From: Márton Németh <nm127@freemail.hu>
Date: Wed, 31 Oct 2007 15:07:12 +0100
Subject: leds: Add support for hardware accelerated LED flashing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends the leds subsystem with a blink_set() callback function which can
be optionally implemented by a LED driver. If implemented, the driver can use
the hardware acceleration for blinking a LED.

Signed-off-by: Márton Németh <nm127@freemail.hu>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 Documentation/leds-class.txt | 24 ++++++++++++++++++++----
 drivers/leds/Kconfig         |  6 +++++-
 drivers/leds/ledtrig-timer.c | 41 +++++++++++++++++++++++++++++++++++++----
 include/linux/leds.h         |  5 +++++
 4 files changed, 67 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt
index 877af6efc18b..56757c751d6f 100644
--- a/Documentation/leds-class.txt
+++ b/Documentation/leds-class.txt
@@ -48,6 +48,26 @@ above leaves scope for further attributes should they be needed. If sections
 of the name don't apply, just leave that section blank.
 
 
+Hardware accelerated blink of LEDs
+==================================
+
+Some LEDs can be programmed to blink without any CPU interaction. To
+support this feature, a LED driver can optionally implement the
+blink_set() function (see <linux/leds.h>). If implemeted, triggers can
+attempt to use it before falling back to software timers. The blink_set()
+function should return 0 if the blink setting is supported, or -EINVAL
+otherwise, which means that LED blinking will be handled by software.
+
+The blink_set() function should choose a user friendly blinking
+value if it is called with *delay_on==0 && *delay_off==0 parameters. In
+this case the driver should give back the chosen value through delay_on
+and delay_off parameters to the leds subsystem.
+
+Any call to the brightness_set() callback function should cancel the
+previously programmed hardware blinking function so setting the brightness
+to 0 can also cancel the blinking of the LED.
+
+
 Known Issues
 ============
 
@@ -56,10 +76,6 @@ would cause nightmare dependency issues. I see this as a minor issue
 compared to the benefits the simple trigger functionality brings. The
 rest of the LED subsystem can be modular.
 
-Some leds can be programmed to flash in hardware. As this isn't a generic
-LED device property, this should be exported as a device specific sysfs
-attribute rather than part of the class if this functionality is required.
-
 
 Future Development
 ==================
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index cf3a6d4d9475..659448ead685 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -152,7 +152,11 @@ config LEDS_TRIGGER_TIMER
 	depends on LEDS_TRIGGERS
 	help
 	  This allows LEDs to be controlled by a programmable timer
-	  via sysfs. If unsure, say Y.
+	  via sysfs. Some LED hardware can be programmed to start
+	  blinking the LED without any further software interaction.
+	  For more details read Documentation/leds-class.txt.
+
+	  If unsure, say Y.
 
 config LEDS_TRIGGER_IDE_DISK
 	bool "LED IDE Disk Trigger"
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index ed9ff02c77ea..82c55d6e4902 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -77,8 +77,21 @@ static ssize_t led_delay_on_store(struct device *dev,
 		count++;
 
 	if (count == size) {
-		timer_data->delay_on = state;
-		mod_timer(&timer_data->timer, jiffies + 1);
+		if (timer_data->delay_on != state) {
+			/* the new value differs from the previous */
+			timer_data->delay_on = state;
+
+			/* deactivate previous settings */
+			del_timer_sync(&timer_data->timer);
+
+			/* try to activate hardware acceleration, if any */
+			if (!led_cdev->blink_set ||
+			    led_cdev->blink_set(led_cdev,
+				&timer_data->delay_on, &timer_data->delay_off)) {
+				/* no hardware acceleration, blink via timer */
+				mod_timer(&timer_data->timer, jiffies + 1);
+			}
+		}
 		ret = count;
 	}
 
@@ -110,8 +123,21 @@ static ssize_t led_delay_off_store(struct device *dev,
 		count++;
 
 	if (count == size) {
-		timer_data->delay_off = state;
-		mod_timer(&timer_data->timer, jiffies + 1);
+		if (timer_data->delay_off != state) {
+			/* the new value differs from the previous */
+			timer_data->delay_off = state;
+
+			/* deactivate previous settings */
+			del_timer_sync(&timer_data->timer);
+
+			/* try to activate hardware acceleration, if any */
+			if (!led_cdev->blink_set ||
+			    led_cdev->blink_set(led_cdev,
+				&timer_data->delay_on, &timer_data->delay_off)) {
+				/* no hardware acceleration, blink via timer */
+				mod_timer(&timer_data->timer, jiffies + 1);
+			}
+		}
 		ret = count;
 	}
 
@@ -143,6 +169,13 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
 	if (rc)
 		goto err_out_delayon;
 
+	/* If there is hardware support for blinking, start one
+	 * user friendly blink rate chosen by the driver.
+	 */
+	if (led_cdev->blink_set)
+		led_cdev->blink_set(led_cdev,
+			&timer_data->delay_on, &timer_data->delay_off);
+
 	return;
 
 err_out_delayon:
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 00f89fd6c52a..0201f6f51cea 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -38,6 +38,11 @@ struct led_classdev {
 	void		(*brightness_set)(struct led_classdev *led_cdev,
 					  enum led_brightness brightness);
 
+	/* Activate hardware accelerated blink */
+	int		(*blink_set)(struct led_classdev *led_cdev,
+				     unsigned long *delay_on,
+				     unsigned long *delay_off);
+
 	struct device		*dev;
 	struct list_head	 node;			/* LED Device list */
 	char			*default_trigger;	/* Trigger to use */
-- 
cgit 


From 388bbb09b991c792310af2f6b49f6c55edb3dff0 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Wed, 6 Feb 2008 10:17:15 +0000
Subject: [MTD] Add mtd panic_write function pointer

MTDs are well suited for logging critical data and the mtdoops driver
allows kernel panics/oops to be written to flash in a blackbox flight
recorder fashion allowing better debugging and analysis of crashes.

Any kernel oops in user context can be easily handled since the kernel
continues as normal and any queued mtd writes are scheduled. Any kernel
oops in interrupt context results in a panic and the delayed writes will
not be scheduled however. The existing mtd->write function cannot be
called in interrupt context so these messages can never be written to
flash.

This patch adds a panic_write function pointer that drivers can
optionally implement which can be called in interrupt context. It is
only intended to be called when its known the kernel is about to panic
and we need to write to succeed. Since the kernel is not going to be
running for much longer, this function can break locks and delay to
ensure the write succeeds (but not sleep).

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/mtdpart.c   | 17 +++++++++++++++++
 include/linux/mtd/mtd.h |  9 +++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 6174a97d7902..c66902df3171 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -151,6 +151,20 @@ static int part_write (struct mtd_info *mtd, loff_t to, size_t len,
 				    len, retlen, buf);
 }
 
+static int part_panic_write (struct mtd_info *mtd, loff_t to, size_t len,
+			size_t *retlen, const u_char *buf)
+{
+	struct mtd_part *part = PART(mtd);
+	if (!(mtd->flags & MTD_WRITEABLE))
+		return -EROFS;
+	if (to >= mtd->size)
+		len = 0;
+	else if (to + len > mtd->size)
+		len = mtd->size - to;
+	return part->master->panic_write (part->master, to + part->offset,
+				    len, retlen, buf);
+}
+
 static int part_write_oob(struct mtd_info *mtd, loff_t to,
 			 struct mtd_oob_ops *ops)
 {
@@ -352,6 +366,9 @@ int add_mtd_partitions(struct mtd_info *master,
 		slave->mtd.read = part_read;
 		slave->mtd.write = part_write;
 
+		if (master->panic_write)
+			slave->mtd.panic_write = part_panic_write;
+
 		if(master->point && master->unpoint){
 			slave->mtd.point = part_point;
 			slave->mtd.unpoint = part_unpoint;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 783fc983417c..0a13bb35f044 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -152,6 +152,15 @@ struct mtd_info {
 	int (*read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
 
+	/* In blackbox flight recorder like scenarios we want to make successful
+	   writes in interrupt context. panic_write() is only intended to be
+	   called when its known the kernel is about to panic and we need the
+	   write to succeed. Since the kernel is not going to be running for much
+	   longer, this function can break locks and delay to ensure the write
+	   succeeds (but not sleep). */
+
+	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
+
 	int (*read_oob) (struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
-- 
cgit 


From 4aa323bd839604dd83aec56ed3a88df352c3339d Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 7 Feb 2008 00:13:22 -0800
Subject: DS1WM: decouple host IRQ and INTR active state settings

The DS1WM driver incorrectly infers the IAS bit (1-wire interrupt active
high) from IRQ settings.  There are devices that have IAS=0 but still need
the IRQ to trigger on a rising edge.  With this patch, machines with DS1WM
that need IAS=1 have to set .active_high=1 in the ds1wm_platform_data.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Acked-by: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/w1/masters/ds1wm.c | 9 +++++----
 include/linux/ds1wm.h      | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c
index 5747997f8d7d..688e435b4d9a 100644
--- a/drivers/w1/masters/ds1wm.c
+++ b/drivers/w1/masters/ds1wm.c
@@ -361,11 +361,12 @@ static int ds1wm_probe(struct platform_device *pdev)
 		goto err1;
 	}
 	ds1wm_data->irq = res->start;
-	ds1wm_data->active_high = (res->flags & IORESOURCE_IRQ_HIGHEDGE) ?
-		1 : 0;
+	ds1wm_data->active_high = plat->active_high;
 
-	set_irq_type(ds1wm_data->irq, ds1wm_data->active_high ?
-			IRQ_TYPE_EDGE_RISING : IRQ_TYPE_EDGE_FALLING);
+	if (res->flags & IORESOURCE_IRQ_HIGHEDGE)
+		set_irq_type(ds1wm_data->irq, IRQ_TYPE_EDGE_RISING);
+	if (res->flags & IORESOURCE_IRQ_LOWEDGE)
+		set_irq_type(ds1wm_data->irq, IRQ_TYPE_EDGE_FALLING);
 
 	ret = request_irq(ds1wm_data->irq, ds1wm_isr, IRQF_DISABLED,
 			  "ds1wm", ds1wm_data);
diff --git a/include/linux/ds1wm.h b/include/linux/ds1wm.h
index 31f6e3c427fb..d3c65e48a2e7 100644
--- a/include/linux/ds1wm.h
+++ b/include/linux/ds1wm.h
@@ -6,6 +6,7 @@ struct ds1wm_platform_data {
 			     * e.g. on h5xxx and h2200 this is 2
 			     * (registers aligned to 4-byte boundaries),
 			     * while on hx4700 this is 1 */
+	int active_high;
 	void (*enable)(struct platform_device *pdev);
 	void (*disable)(struct platform_device *pdev);
 };
-- 
cgit 


From deb21db7788b97a2bccdefe605433ef97f482689 Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Thu, 7 Feb 2008 00:13:25 -0800
Subject: VFS: swap do_ioctl and vfs_ioctl names

Rename old vfs_ioctl to do_ioctl, because the comment above it clearly
indicates that it is an internal function not to be exported to modules;
therefore it should have a more traditional do_XXX name.  The new do_ioctl
is exported in fs.h but not to modules.

Rename the old do_ioctl to vfs_ioctl because the names vfs_XXX should
preferably be reserved to callable VFS functions which modules may call, as
many other vfs_XXX functions already do.  Export the new vfs_ioctl to GPL
modules so others can use it (including Unionfs and eCryptfs).  Add DocBook
for new vfs_ioctl.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat_ioctl.c  |  2 +-
 fs/ioctl.c         | 28 ++++++++++++++++++++--------
 include/linux/fs.h |  4 +++-
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ffdc022cae64..614bd75b5a4a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2986,7 +2986,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	}
 
  do_ioctl:
-	error = vfs_ioctl(filp, fd, cmd, arg);
+	error = do_vfs_ioctl(filp, fd, cmd, arg);
  out_fput:
 	fput_light(filp, fput_needed);
  out:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 652cacf433ff..e6500cd12258 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -16,8 +16,20 @@
 
 #include <asm/ioctls.h>
 
-static long do_ioctl(struct file *filp, unsigned int cmd,
-		unsigned long arg)
+/**
+ * vfs_ioctl - call filesystem specific ioctl methods
+ * @filp: [in]     open file to invoke ioctl method on
+ * @cmd:  [in]     ioctl command to execute
+ * @arg:  [in/out] command-specific argument for ioctl
+ *
+ * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise
+ * invokes * filesystem specific ->ioctl method.  If neither method exists,
+ * returns -ENOTTY.
+ *
+ * Returns 0 on success, -errno on error.
+ */
+long vfs_ioctl(struct file *filp, unsigned int cmd,
+	       unsigned long arg)
 {
 	int error = -ENOTTY;
 
@@ -72,18 +84,18 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 		return put_user(i_size_read(inode) - filp->f_pos, p);
 	}
 
-	return do_ioctl(filp, cmd, arg);
+	return vfs_ioctl(filp, cmd, arg);
 }
 
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
- * vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
+ * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
  */
-int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
-	      unsigned long arg)
+int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+	     unsigned long arg)
 {
 	unsigned int flag;
 	int on, error = 0;
@@ -152,7 +164,7 @@ int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
 		else
-			error = do_ioctl(filp, cmd, arg);
+			error = vfs_ioctl(filp, cmd, arg);
 		break;
 	}
 	return error;
@@ -172,7 +184,7 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 	if (error)
 		goto out_fput;
 
-	error = vfs_ioctl(filp, fd, cmd, arg);
+	error = do_vfs_ioctl(filp, fd, cmd, arg);
  out_fput:
 	fput_light(filp, fput_needed);
  out:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 109734bf6377..2925f7011ece 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1941,7 +1941,9 @@ extern int vfs_stat_fd(int dfd, char __user *, struct kstat *);
 extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 
-extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
+extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+		    unsigned long arg);
 
 extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
-- 
cgit 


From 3dddbfc30106280d98a5752b6c622f65e5eb3663 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 7 Feb 2008 00:13:28 -0800
Subject: tty: Kill TTY_FLIPBUF_SIZE

This legacy define from the old buffer code is now only used in a single
power pc driver than doesn't compile anyway.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5824a9777ad7..dd8e08fe8855 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -53,13 +53,6 @@
  */
 #define __DISABLED_CHAR '\0'
 
-/*
- * This is the flip buffer used for the tty driver.  The buffer is
- * located in the tty structure, and is used as a high speed interface
- * between the tty driver and the tty line discipline.
- */
-#define TTY_FLIPBUF_SIZE 512
-
 struct tty_buffer {
 	struct tty_buffer *next;
 	char *char_buf_ptr;
-- 
cgit 


From e552b6617067ab785256dcec5ca29eeea981aacb Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Thu, 7 Feb 2008 00:13:49 -0800
Subject: Memory controller: resource counters

With fixes from David Rientjes <rientjes@google.com>

Introduce generic structures and routines for resource accounting.

Each resource accounting cgroup is supposed to aggregate it,
cgroup_subsystem_state and its resource-specific members within.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 102 +++++++++++++++++++++++++++++++++++++
 init/Kconfig                |   7 +++
 kernel/Makefile             |   1 +
 kernel/res_counter.c        | 120 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 230 insertions(+)
 create mode 100644 include/linux/res_counter.h
 create mode 100644 kernel/res_counter.c

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
new file mode 100644
index 000000000000..eeb3f7749772
--- /dev/null
+++ b/include/linux/res_counter.h
@@ -0,0 +1,102 @@
+#ifndef __RES_COUNTER_H__
+#define __RES_COUNTER_H__
+
+/*
+ * Resource Counters
+ * Contain common data types and routines for resource accounting
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/cgroup.h>
+
+/*
+ * The core object. the cgroup that wishes to account for some
+ * resource may include this counter into its structures and use
+ * the helpers described beyond
+ */
+
+struct res_counter {
+	/*
+	 * the current resource consumption level
+	 */
+	unsigned long usage;
+	/*
+	 * the limit that usage cannot exceed
+	 */
+	unsigned long limit;
+	/*
+	 * the number of unsuccessful attempts to consume the resource
+	 */
+	unsigned long failcnt;
+	/*
+	 * the lock to protect all of the above.
+	 * the routines below consider this to be IRQ-safe
+	 */
+	spinlock_t lock;
+};
+
+/*
+ * Helpers to interact with userspace
+ * res_counter_read/_write - put/get the specified fields from the
+ * res_counter struct to/from the user
+ *
+ * @counter:     the counter in question
+ * @member:  the field to work with (see RES_xxx below)
+ * @buf:     the buffer to opeate on,...
+ * @nbytes:  its size...
+ * @pos:     and the offset.
+ */
+
+ssize_t res_counter_read(struct res_counter *counter, int member,
+		const char __user *buf, size_t nbytes, loff_t *pos);
+ssize_t res_counter_write(struct res_counter *counter, int member,
+		const char __user *buf, size_t nbytes, loff_t *pos);
+
+/*
+ * the field descriptors. one for each member of res_counter
+ */
+
+enum {
+	RES_USAGE,
+	RES_LIMIT,
+	RES_FAILCNT,
+};
+
+/*
+ * helpers for accounting
+ */
+
+void res_counter_init(struct res_counter *counter);
+
+/*
+ * charge - try to consume more resource.
+ *
+ * @counter: the counter
+ * @val: the amount of the resource. each controller defines its own
+ *       units, e.g. numbers, bytes, Kbytes, etc
+ *
+ * returns 0 on success and <0 if the counter->usage will exceed the
+ * counter->limit _locked call expects the counter->lock to be taken
+ */
+
+int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
+int res_counter_charge(struct res_counter *counter, unsigned long val);
+
+/*
+ * uncharge - tell that some portion of the resource is released
+ *
+ * @counter: the counter
+ * @val: the amount of the resource
+ *
+ * these calls check for usage underflow and show a warning on the console
+ * _locked call expects the counter->lock to be taken
+ */
+
+void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 92b23e256614..d372bd616b0c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -369,6 +369,13 @@ config CGROUP_CPUACCT
 	  Provides a simple Resource Controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup
 
+config RESOURCE_COUNTERS
+	bool "Resource counters"
+	help
+	  This option enables controller independent resource accounting
+          infrastructure that works with cgroups
+	depends on CGROUPS
+
 config SYSFS_DEPRECATED
 	bool "Create deprecated sysfs files"
 	depends on SYSFS
diff --git a/kernel/Makefile b/kernel/Makefile
index 135a1b943446..685697c0a181 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
+obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
new file mode 100644
index 000000000000..722c484b068b
--- /dev/null
+++ b/kernel/res_counter.c
@@ -0,0 +1,120 @@
+/*
+ * resource cgroups
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/parser.h>
+#include <linux/fs.h>
+#include <linux/res_counter.h>
+#include <linux/uaccess.h>
+
+void res_counter_init(struct res_counter *counter)
+{
+	spin_lock_init(&counter->lock);
+	counter->limit = (unsigned long)LONG_MAX;
+}
+
+int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
+{
+	if (counter->usage + val > counter->limit) {
+		counter->failcnt++;
+		return -ENOMEM;
+	}
+
+	counter->usage += val;
+	return 0;
+}
+
+int res_counter_charge(struct res_counter *counter, unsigned long val)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&counter->lock, flags);
+	ret = res_counter_charge_locked(counter, val);
+	spin_unlock_irqrestore(&counter->lock, flags);
+	return ret;
+}
+
+void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
+{
+	if (WARN_ON(counter->usage < val))
+		val = counter->usage;
+
+	counter->usage -= val;
+}
+
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&counter->lock, flags);
+	res_counter_uncharge_locked(counter, val);
+	spin_unlock_irqrestore(&counter->lock, flags);
+}
+
+
+static inline unsigned long *res_counter_member(struct res_counter *counter,
+						int member)
+{
+	switch (member) {
+	case RES_USAGE:
+		return &counter->usage;
+	case RES_LIMIT:
+		return &counter->limit;
+	case RES_FAILCNT:
+		return &counter->failcnt;
+	};
+
+	BUG();
+	return NULL;
+}
+
+ssize_t res_counter_read(struct res_counter *counter, int member,
+		const char __user *userbuf, size_t nbytes, loff_t *pos)
+{
+	unsigned long *val;
+	char buf[64], *s;
+
+	s = buf;
+	val = res_counter_member(counter, member);
+	s += sprintf(s, "%lu\n", *val);
+	return simple_read_from_buffer((void __user *)userbuf, nbytes,
+			pos, buf, s - buf);
+}
+
+ssize_t res_counter_write(struct res_counter *counter, int member,
+		const char __user *userbuf, size_t nbytes, loff_t *pos)
+{
+	int ret;
+	char *buf, *end;
+	unsigned long tmp, *val;
+
+	buf = kmalloc(nbytes + 1, GFP_KERNEL);
+	ret = -ENOMEM;
+	if (buf == NULL)
+		goto out;
+
+	buf[nbytes] = '\0';
+	ret = -EFAULT;
+	if (copy_from_user(buf, userbuf, nbytes))
+		goto out_free;
+
+	ret = -EINVAL;
+	tmp = simple_strtoul(buf, &end, 10);
+	if (*end != '\0')
+		goto out_free;
+
+	val = res_counter_member(counter, member);
+	*val = tmp;
+	ret = nbytes;
+out_free:
+	kfree(buf);
+out:
+	return ret;
+}
-- 
cgit 


From 8cdea7c05454260c0d4d83503949c358eb131d17 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:13:50 -0800
Subject: Memory controller: cgroups setup

Setup the memory cgroup and add basic hooks and controls to integrate
and work with the cgroup.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup_subsys.h |   5 ++
 include/linux/memcontrol.h    |  21 +++++++
 init/Kconfig                  |   7 +++
 mm/Makefile                   |   1 +
 mm/memcontrol.c               | 127 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 161 insertions(+)
 create mode 100644 include/linux/memcontrol.h
 create mode 100644 mm/memcontrol.c

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9ec43186ba80..228235c5ae53 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -37,3 +37,8 @@ SUBSYS(cpuacct)
 
 /* */
 
+#ifdef CONFIG_CGROUP_MEM_CONT
+SUBSYS(mem_cgroup)
+#endif
+
+/* */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
new file mode 100644
index 000000000000..3f121b27677a
--- /dev/null
+++ b/include/linux/memcontrol.h
@@ -0,0 +1,21 @@
+/* memcontrol.h - Memory Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_MEMCONTROL_H
+#define _LINUX_MEMCONTROL_H
+
+#endif /* _LINUX_MEMCONTROL_H */
+
diff --git a/init/Kconfig b/init/Kconfig
index d372bd616b0c..95ac2657b0f4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -397,6 +397,13 @@ config SYSFS_DEPRECATED
 	  If you are using a distro that was released in 2006 or later,
 	  it should be safe to say N here.
 
+config CGROUP_MEM_CONT
+	bool "Memory controller for cgroups"
+	depends on CGROUPS && RESOURCE_COUNTERS
+	help
+	  Provides a memory controller that manages both page cache and
+	  RSS memory.
+
 config PROC_PID_CPUSET
 	bool "Include legacy /proc/<pid>/cpuset file"
 	depends on CPUSETS
diff --git a/mm/Makefile b/mm/Makefile
index 4af5dff37277..9f117bab5322 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -32,4 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
+obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
new file mode 100644
index 000000000000..80e48cd9d0c7
--- /dev/null
+++ b/mm/memcontrol.c
@@ -0,0 +1,127 @@
+/* memcontrol.c - Memory Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/res_counter.h>
+#include <linux/memcontrol.h>
+#include <linux/cgroup.h>
+
+struct cgroup_subsys mem_cgroup_subsys;
+
+/*
+ * The memory controller data structure. The memory controller controls both
+ * page cache and RSS per cgroup. We would eventually like to provide
+ * statistics based on the statistics developed by Rik Van Riel for clock-pro,
+ * to help the administrator determine what knobs to tune.
+ *
+ * TODO: Add a water mark for the memory controller. Reclaim will begin when
+ * we hit the water mark.
+ */
+struct mem_cgroup {
+	struct cgroup_subsys_state css;
+	/*
+	 * the counter to account for memory usage
+	 */
+	struct res_counter res;
+};
+
+/*
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ */
+struct page_cgroup {
+	struct list_head lru;		/* per cgroup LRU list */
+	struct page *page;
+	struct mem_cgroup *mem_cgroup;
+};
+
+
+static inline
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont,
+				mem_cgroup_subsys_id), struct mem_cgroup,
+				css);
+}
+
+static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
+			struct file *file, char __user *userbuf, size_t nbytes,
+			loff_t *ppos)
+{
+	return res_counter_read(&mem_cgroup_from_cont(cont)->res,
+				cft->private, userbuf, nbytes, ppos);
+}
+
+static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+				struct file *file, const char __user *userbuf,
+				size_t nbytes, loff_t *ppos)
+{
+	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
+				cft->private, userbuf, nbytes, ppos);
+}
+
+static struct cftype mem_cgroup_files[] = {
+	{
+		.name = "usage",
+		.private = RES_USAGE,
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "limit",
+		.private = RES_LIMIT,
+		.write = mem_cgroup_write,
+		.read = mem_cgroup_read,
+	},
+	{
+		.name = "failcnt",
+		.private = RES_FAILCNT,
+		.read = mem_cgroup_read,
+	},
+};
+
+static struct cgroup_subsys_state *
+mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct mem_cgroup *mem;
+
+	mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	res_counter_init(&mem->res);
+	return &mem->css;
+}
+
+static void mem_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+	kfree(mem_cgroup_from_cont(cont));
+}
+
+static int mem_cgroup_populate(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, mem_cgroup_files,
+					ARRAY_SIZE(mem_cgroup_files));
+}
+
+struct cgroup_subsys mem_cgroup_subsys = {
+	.name = "memory",
+	.subsys_id = mem_cgroup_subsys_id,
+	.create = mem_cgroup_create,
+	.destroy = mem_cgroup_destroy,
+	.populate = mem_cgroup_populate,
+	.early_init = 0,
+};
-- 
cgit 


From 78fb74669e80883323391090e4d26d17fe29488f Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Thu, 7 Feb 2008 00:13:51 -0800
Subject: Memory controller: accounting setup

Basic setup routines, the mm_struct has a pointer to the cgroup that
it belongs to and the the page has a page_cgroup associated with it.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 36 +++++++++++++++++++++++++++++
 include/linux/mm_types.h   |  6 +++++
 include/linux/sched.h      |  1 +
 kernel/fork.c              | 11 ++++++---
 mm/memcontrol.c            | 57 ++++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 104 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3f121b27677a..7d1f119c796e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -3,6 +3,9 @@
  * Copyright IBM Corporation, 2007
  * Author Balbir Singh <balbir@linux.vnet.ibm.com>
  *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -17,5 +20,38 @@
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
 
+struct mem_cgroup;
+struct page_cgroup;
+
+#ifdef CONFIG_CGROUP_MEM_CONT
+
+extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
+extern void mm_free_cgroup(struct mm_struct *mm);
+extern void page_assign_page_cgroup(struct page *page,
+					struct page_cgroup *pc);
+extern struct page_cgroup *page_get_page_cgroup(struct page *page);
+
+#else /* CONFIG_CGROUP_MEM_CONT */
+static inline void mm_init_cgroup(struct mm_struct *mm,
+					struct task_struct *p)
+{
+}
+
+static inline void mm_free_cgroup(struct mm_struct *mm)
+{
+}
+
+static inline void page_assign_page_cgroup(struct page *page,
+						struct page_cgroup *pc)
+{
+}
+
+static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_CGROUP_MEM_CONT */
+
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f4c03e0b355e..34023c65d466 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -88,6 +88,9 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_CGROUP_MEM_CONT
+	unsigned long page_cgroup;
+#endif
 };
 
 /*
@@ -219,6 +222,9 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+#ifdef CONFIG_CGROUP_MEM_CONT
+	struct mem_cgroup *mem_cgroup;
+#endif
 };
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7c8ca05c3cae..8a4812c1c038 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,6 +92,7 @@ struct sched_param {
 
 #include <asm/processor.h>
 
+struct mem_cgroup;
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3995297567a9..b2ef8e4fad70 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/memcontrol.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -340,7 +341,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
@@ -357,11 +358,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm_init_cgroup(mm, p);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
 		return mm;
 	}
+
+	mm_free_cgroup(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -376,7 +380,7 @@ struct mm_struct * mm_alloc(void)
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm);
+		mm = mm_init(mm, current);
 	}
 	return mm;
 }
@@ -390,6 +394,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
+	mm_free_cgroup(mm);
 	destroy_context(mm);
 	free_mm(mm);
 }
@@ -511,7 +516,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 	mm->token_priority = 0;
 	mm->last_interval = 0;
 
-	if (!mm_init(mm))
+	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
 	if (init_new_context(tsk, mm))
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 80e48cd9d0c7..4d4805eb37c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3,6 +3,9 @@
  * Copyright IBM Corporation, 2007
  * Author Balbir Singh <balbir@linux.vnet.ibm.com>
  *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -17,6 +20,7 @@
 #include <linux/res_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
+#include <linux/mm.h>
 
 struct cgroup_subsys mem_cgroup_subsys;
 
@@ -35,6 +39,13 @@ struct mem_cgroup {
 	 * the counter to account for memory usage
 	 */
 	struct res_counter res;
+	/*
+	 * Per cgroup active and inactive list, similar to the
+	 * per zone LRU lists.
+	 * TODO: Consider making these lists per zone
+	 */
+	struct list_head active_list;
+	struct list_head inactive_list;
 };
 
 /*
@@ -56,6 +67,37 @@ struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 				css);
 }
 
+static inline
+struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
+{
+	return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
+				struct mem_cgroup, css);
+}
+
+void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
+{
+	struct mem_cgroup *mem;
+
+	mem = mem_cgroup_from_task(p);
+	css_get(&mem->css);
+	mm->mem_cgroup = mem;
+}
+
+void mm_free_cgroup(struct mm_struct *mm)
+{
+	css_put(&mm->mem_cgroup->css);
+}
+
+void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
+{
+	page->page_cgroup = (unsigned long)pc;
+}
+
+struct page_cgroup *page_get_page_cgroup(struct page *page)
+{
+	return page->page_cgroup;
+}
+
 static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
 			struct file *file, char __user *userbuf, size_t nbytes,
 			loff_t *ppos)
@@ -91,14 +133,21 @@ static struct cftype mem_cgroup_files[] = {
 	},
 };
 
+static struct mem_cgroup init_mem_cgroup;
+
 static struct cgroup_subsys_state *
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
 	struct mem_cgroup *mem;
 
-	mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
-	if (!mem)
-		return -ENOMEM;
+	if (unlikely((cont->parent) == NULL)) {
+		mem = &init_mem_cgroup;
+		init_mm.mem_cgroup = mem;
+	} else
+		mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
+
+	if (mem == NULL)
+		return NULL;
 
 	res_counter_init(&mem->res);
 	return &mem->css;
@@ -123,5 +172,5 @@ struct cgroup_subsys mem_cgroup_subsys = {
 	.create = mem_cgroup_create,
 	.destroy = mem_cgroup_destroy,
 	.populate = mem_cgroup_populate,
-	.early_init = 0,
+	.early_init = 1,
 };
-- 
cgit 


From 8a9f3ccd24741b50200c3f33d62534c7271f3dfc Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:13:53 -0800
Subject: Memory controller: memory accounting

Add the accounting hooks.  The accounting is carried out for RSS and Page
Cache (unmapped) pages.  There is now a common limit and accounting for both.
The RSS accounting is accounted at page_add_*_rmap() and page_remove_rmap()
time.  Page cache is accounted at add_to_page_cache(),
__delete_from_page_cache().  Swap cache is also accounted for.

Each page's page_cgroup is protected with the last bit of the
page_cgroup pointer, this makes handling of race conditions involving
simultaneous mappings of a page easier.  A reference count is kept in the
page_cgroup to deal with cases where a page might be unmapped from the RSS
of all tasks, but still lives in the page cache.

Credits go to Vaidyanathan Srinivasan for helping with reference counting work
of the page cgroup.  Almost all of the page cache accounting code has help
from Vaidyanathan Srinivasan.

[hugh@veritas.com: fix swapoff breakage]
[akpm@linux-foundation.org: fix locking]
Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: <Valdis.Kletnieks@vt.edu>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  20 ++++++
 mm/filemap.c               |  12 +++-
 mm/memcontrol.c            | 166 ++++++++++++++++++++++++++++++++++++++++++++-
 mm/memory.c                |  47 +++++++++++--
 mm/migrate.c               |   6 ++
 mm/page_alloc.c            |   3 +
 mm/rmap.c                  |  17 ++++-
 mm/swap_state.c            |  10 +++
 mm/swapfile.c              |  41 ++++++-----
 9 files changed, 295 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7d1f119c796e..f5b47efab48b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -30,6 +30,13 @@ extern void mm_free_cgroup(struct mm_struct *mm);
 extern void page_assign_page_cgroup(struct page *page,
 					struct page_cgroup *pc);
 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
+extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
+extern void mem_cgroup_uncharge(struct page_cgroup *pc);
+
+static inline void mem_cgroup_uncharge_page(struct page *page)
+{
+	mem_cgroup_uncharge(page_get_page_cgroup(page));
+}
 
 #else /* CONFIG_CGROUP_MEM_CONT */
 static inline void mm_init_cgroup(struct mm_struct *mm,
@@ -51,6 +58,19 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
 	return NULL;
 }
 
+static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void mem_cgroup_uncharge(struct page_cgroup *pc)
+{
+}
+
+static inline void mem_cgroup_uncharge_page(struct page *page)
+{
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 81fb9bff0d4f..b7a01e927953 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
 #include <linux/syscalls.h>
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/memcontrol.h>
 #include "internal.h"
 
 /*
@@ -118,6 +119,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	mem_cgroup_uncharge_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -461,6 +463,11 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 
 	if (error == 0) {
+
+		error = mem_cgroup_charge(page, current->mm);
+		if (error)
+			goto out;
+
 		write_lock_irq(&mapping->tree_lock);
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
 		if (!error) {
@@ -470,10 +477,13 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 			page->index = offset;
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
-		}
+		} else
+			mem_cgroup_uncharge_page(page);
+
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	}
+out:
 	return error;
 }
 EXPORT_SYMBOL(add_to_page_cache);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4d4805eb37c7..ebca767292dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -21,6 +21,9 @@
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/bit_spinlock.h>
+#include <linux/rcupdate.h>
 
 struct cgroup_subsys mem_cgroup_subsys;
 
@@ -31,7 +34,9 @@ struct cgroup_subsys mem_cgroup_subsys;
  * to help the administrator determine what knobs to tune.
  *
  * TODO: Add a water mark for the memory controller. Reclaim will begin when
- * we hit the water mark.
+ * we hit the water mark. May be even add a low water mark, such that
+ * no reclaim occurs from a cgroup at it's low water mark, this is
+ * a feature that will be implemented much later in the future.
  */
 struct mem_cgroup {
 	struct cgroup_subsys_state css;
@@ -48,6 +53,14 @@ struct mem_cgroup {
 	struct list_head inactive_list;
 };
 
+/*
+ * We use the lower bit of the page->page_cgroup pointer as a bit spin
+ * lock. We need to ensure that page->page_cgroup is atleast two
+ * byte aligned (based on comments from Nick Piggin)
+ */
+#define PAGE_CGROUP_LOCK_BIT 	0x0
+#define PAGE_CGROUP_LOCK 		(1 << PAGE_CGROUP_LOCK_BIT)
+
 /*
  * A page_cgroup page is associated with every page descriptor. The
  * page_cgroup helps us identify information about the cgroup
@@ -56,6 +69,8 @@ struct page_cgroup {
 	struct list_head lru;		/* per cgroup LRU list */
 	struct page *page;
 	struct mem_cgroup *mem_cgroup;
+	atomic_t ref_cnt;		/* Helpful when pages move b/w  */
+					/* mapped and cached states     */
 };
 
 
@@ -88,14 +103,157 @@ void mm_free_cgroup(struct mm_struct *mm)
 	css_put(&mm->mem_cgroup->css);
 }
 
+static inline int page_cgroup_locked(struct page *page)
+{
+	return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT,
+					&page->page_cgroup);
+}
+
 void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
 {
-	page->page_cgroup = (unsigned long)pc;
+	int locked;
+
+	/*
+	 * While resetting the page_cgroup we might not hold the
+	 * page_cgroup lock. free_hot_cold_page() is an example
+	 * of such a scenario
+	 */
+	if (pc)
+		VM_BUG_ON(!page_cgroup_locked(page));
+	locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
+	page->page_cgroup = ((unsigned long)pc | locked);
 }
 
 struct page_cgroup *page_get_page_cgroup(struct page *page)
 {
-	return page->page_cgroup;
+	return (struct page_cgroup *)
+		(page->page_cgroup & ~PAGE_CGROUP_LOCK);
+}
+
+void __always_inline lock_page_cgroup(struct page *page)
+{
+	bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
+	VM_BUG_ON(!page_cgroup_locked(page));
+}
+
+void __always_inline unlock_page_cgroup(struct page *page)
+{
+	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
+}
+
+/*
+ * Charge the memory controller for page usage.
+ * Return
+ * 0 if the charge was successful
+ * < 0 if the cgroup is over its limit
+ */
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
+{
+	struct mem_cgroup *mem;
+	struct page_cgroup *pc, *race_pc;
+
+	/*
+	 * Should page_cgroup's go to their own slab?
+	 * One could optimize the performance of the charging routine
+	 * by saving a bit in the page_flags and using it as a lock
+	 * to see if the cgroup page already has a page_cgroup associated
+	 * with it
+	 */
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	/*
+	 * The page_cgroup exists and the page has already been accounted
+	 */
+	if (pc) {
+		atomic_inc(&pc->ref_cnt);
+		goto done;
+	}
+
+	unlock_page_cgroup(page);
+
+	pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL);
+	if (pc == NULL)
+		goto err;
+
+	rcu_read_lock();
+	/*
+	 * We always charge the cgroup the mm_struct belongs to
+	 * the mm_struct's mem_cgroup changes on task migration if the
+	 * thread group leader migrates. It's possible that mm is not
+	 * set, if so charge the init_mm (happens for pagecache usage).
+	 */
+	if (!mm)
+		mm = &init_mm;
+
+	mem = rcu_dereference(mm->mem_cgroup);
+	/*
+	 * For every charge from the cgroup, increment reference
+	 * count
+	 */
+	css_get(&mem->css);
+	rcu_read_unlock();
+
+	/*
+	 * If we created the page_cgroup, we should free it on exceeding
+	 * the cgroup limit.
+	 */
+	if (res_counter_charge(&mem->res, 1)) {
+		css_put(&mem->css);
+		goto free_pc;
+	}
+
+	lock_page_cgroup(page);
+	/*
+	 * Check if somebody else beat us to allocating the page_cgroup
+	 */
+	race_pc = page_get_page_cgroup(page);
+	if (race_pc) {
+		kfree(pc);
+		pc = race_pc;
+		atomic_inc(&pc->ref_cnt);
+		res_counter_uncharge(&mem->res, 1);
+		css_put(&mem->css);
+		goto done;
+	}
+
+	atomic_set(&pc->ref_cnt, 1);
+	pc->mem_cgroup = mem;
+	pc->page = page;
+	page_assign_page_cgroup(page, pc);
+
+done:
+	unlock_page_cgroup(page);
+	return 0;
+free_pc:
+	kfree(pc);
+	return -ENOMEM;
+err:
+	unlock_page_cgroup(page);
+	return -ENOMEM;
+}
+
+/*
+ * Uncharging is always a welcome operation, we never complain, simply
+ * uncharge.
+ */
+void mem_cgroup_uncharge(struct page_cgroup *pc)
+{
+	struct mem_cgroup *mem;
+	struct page *page;
+
+	if (!pc)
+		return;
+
+	if (atomic_dec_and_test(&pc->ref_cnt)) {
+		page = pc->page;
+		lock_page_cgroup(page);
+		mem = pc->mem_cgroup;
+		css_put(&mem->css);
+		page_assign_page_cgroup(page, NULL);
+		unlock_page_cgroup(page);
+		res_counter_uncharge(&mem->res, 1);
+		kfree(pc);
+	}
 }
 
 static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
@@ -150,6 +308,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 		return NULL;
 
 	res_counter_init(&mem->res);
+	INIT_LIST_HEAD(&mem->active_list);
+	INIT_LIST_HEAD(&mem->inactive_list);
 	return &mem->css;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 9d073fa0a2d0..0ba224ea6ba4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -50,6 +50,7 @@
 #include <linux/delayacct.h>
 #include <linux/init.h>
 #include <linux/writeback.h>
+#include <linux/memcontrol.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -1144,16 +1145,20 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
 {
 	int retval;
 	pte_t *pte;
-	spinlock_t *ptl;  
+	spinlock_t *ptl;
+
+	retval = mem_cgroup_charge(page, mm);
+	if (retval)
+		goto out;
 
 	retval = -EINVAL;
 	if (PageAnon(page))
-		goto out;
+		goto out_uncharge;
 	retval = -ENOMEM;
 	flush_dcache_page(page);
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
-		goto out;
+		goto out_uncharge;
 	retval = -EBUSY;
 	if (!pte_none(*pte))
 		goto out_unlock;
@@ -1165,8 +1170,12 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
 	retval = 0;
+	pte_unmap_unlock(pte, ptl);
+	return retval;
 out_unlock:
 	pte_unmap_unlock(pte, ptl);
+out_uncharge:
+	mem_cgroup_uncharge_page(page);
 out:
 	return retval;
 }
@@ -1641,6 +1650,9 @@ gotten:
 	cow_user_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
+	if (mem_cgroup_charge(new_page, mm))
+		goto oom_free_new;
+
 	/*
 	 * Re-check the pte - we dropped the lock
 	 */
@@ -1672,7 +1684,9 @@ gotten:
 		/* Free the old page.. */
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
-	}
+	} else
+		mem_cgroup_uncharge_page(new_page);
+
 	if (new_page)
 		page_cache_release(new_page);
 	if (old_page)
@@ -1696,6 +1710,8 @@ unlock:
 		put_page(dirty_page);
 	}
 	return ret;
+oom_free_new:
+	__free_page(new_page);
 oom:
 	if (old_page)
 		page_cache_release(old_page);
@@ -2036,6 +2052,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(PGMAJFAULT);
 	}
 
+	if (mem_cgroup_charge(page, mm)) {
+		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+		ret = VM_FAULT_OOM;
+		goto out;
+	}
+
 	mark_page_accessed(page);
 	lock_page(page);
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2073,8 +2095,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (write_access) {
 		/* XXX: We could OR the do_wp_page code with this one? */
 		if (do_wp_page(mm, vma, address,
-				page_table, pmd, ptl, pte) & VM_FAULT_OOM)
+				page_table, pmd, ptl, pte) & VM_FAULT_OOM) {
+			mem_cgroup_uncharge_page(page);
 			ret = VM_FAULT_OOM;
+		}
 		goto out;
 	}
 
@@ -2085,6 +2109,7 @@ unlock:
 out:
 	return ret;
 out_nomap:
+	mem_cgroup_uncharge_page(page);
 	pte_unmap_unlock(page_table, ptl);
 	unlock_page(page);
 	page_cache_release(page);
@@ -2114,6 +2139,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto oom;
 	__SetPageUptodate(page);
 
+	if (mem_cgroup_charge(page, mm))
+		goto oom_free_page;
+
 	entry = mk_pte(page, vma->vm_page_prot);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
@@ -2131,8 +2159,11 @@ unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return 0;
 release:
+	mem_cgroup_uncharge_page(page);
 	page_cache_release(page);
 	goto unlock;
+oom_free_page:
+	__free_page(page);
 oom:
 	return VM_FAULT_OOM;
 }
@@ -2246,6 +2277,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	}
 
+	if (mem_cgroup_charge(page, mm)) {
+		ret = VM_FAULT_OOM;
+		goto out;
+	}
+
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 
 	/*
@@ -2281,6 +2317,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* no need to invalidate: a not-present page won't be cached */
 		update_mmu_cache(vma, address, entry);
 	} else {
+		mem_cgroup_uncharge_page(page);
 		if (anon)
 			page_cache_release(page);
 		else
diff --git a/mm/migrate.c b/mm/migrate.c
index 857a987e3690..417bbda14e5b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -29,6 +29,7 @@
 #include <linux/mempolicy.h>
 #include <linux/vmalloc.h>
 #include <linux/security.h>
+#include <linux/memcontrol.h>
 
 #include "internal.h"
 
@@ -152,6 +153,11 @@ static void remove_migration_pte(struct vm_area_struct *vma,
  		return;
  	}
 
+	if (mem_cgroup_charge(new, mm)) {
+		pte_unmap(ptep);
+		return;
+	}
+
  	ptl = pte_lockptr(mm, pmd);
  	spin_lock(ptl);
 	pte = *ptep;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 37576b822f06..26a54a17dc9f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -43,6 +43,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
+#include <linux/memcontrol.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -987,6 +988,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 	if (!PageHighMem(page))
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
+	VM_BUG_ON(page_get_page_cgroup(page));
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
@@ -2525,6 +2527,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		set_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
+		page_assign_page_cgroup(page, NULL);
 		SetPageReserved(page);
 
 		/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 57ad276900c9..4a3487921eff 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -48,6 +48,7 @@
 #include <linux/rcupdate.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <linux/memcontrol.h>
 
 #include <asm/tlbflush.h>
 
@@ -554,8 +555,14 @@ void page_add_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	if (atomic_inc_and_test(&page->_mapcount))
 		__page_set_anon_rmap(page, vma, address);
-	else
+	else {
 		__page_check_anon_rmap(page, vma, address);
+		/*
+		 * We unconditionally charged during prepare, we uncharge here
+		 * This takes care of balancing the reference counts
+		 */
+		mem_cgroup_uncharge_page(page);
+	}
 }
 
 /*
@@ -586,6 +593,12 @@ void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
+	else
+		/*
+		 * We unconditionally charged during prepare, we uncharge here
+		 * This takes care of balancing the reference counts
+		 */
+		mem_cgroup_uncharge_page(page);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -646,6 +659,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
+		mem_cgroup_uncharge_page(page);
+
 		__dec_zone_page_state(page,
 				PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ec42f01a8d02..f96e3ff1e791 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,6 +17,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 #include <linux/migrate.h>
+#include <linux/memcontrol.h>
 
 #include <asm/pgtable.h>
 
@@ -76,6 +77,11 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	BUG_ON(PagePrivate(page));
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
+
+		error = mem_cgroup_charge(page, current->mm);
+		if (error)
+			goto out;
+
 		write_lock_irq(&swapper_space.tree_lock);
 		error = radix_tree_insert(&swapper_space.page_tree,
 						entry.val, page);
@@ -86,10 +92,13 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 			total_swapcache_pages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 			INC_CACHE_INFO(add_total);
+		} else {
+			mem_cgroup_uncharge_page(page);
 		}
 		write_unlock_irq(&swapper_space.tree_lock);
 		radix_tree_preload_end();
 	}
+out:
 	return error;
 }
 
@@ -104,6 +113,7 @@ void __delete_from_swap_cache(struct page *page)
 	BUG_ON(PageWriteback(page));
 	BUG_ON(PagePrivate(page));
 
+	mem_cgroup_uncharge_page(page);
 	radix_tree_delete(&swapper_space.page_tree, page_private(page));
 	set_page_private(page, 0);
 	ClearPageSwapCache(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index afae7b1f680b..fddc4cc4149b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <linux/capability.h>
 #include <linux/syscalls.h>
+#include <linux/memcontrol.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -506,9 +507,12 @@ unsigned int count_swap_pages(int type, int free)
  * just let do_wp_page work it out if a write is requested later - to
  * force COW, vm_page_prot omits write permission from any private vma.
  */
-static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
+	if (mem_cgroup_charge(page, vma->vm_mm))
+		return -ENOMEM;
+
 	inc_mm_counter(vma->vm_mm, anon_rss);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
@@ -520,6 +524,7 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
 	 * immediately swapped out again after swapon.
 	 */
 	activate_page(page);
+	return 1;
 }
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -529,7 +534,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	pte_t swp_pte = swp_entry_to_pte(entry);
 	pte_t *pte;
 	spinlock_t *ptl;
-	int found = 0;
+	int ret = 0;
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	do {
@@ -538,13 +543,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, pte++, addr, entry, page);
-			found = 1;
+			ret = unuse_pte(vma, pte++, addr, entry, page);
 			break;
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap_unlock(pte - 1, ptl);
-	return found;
+	return ret;
 }
 
 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
@@ -553,14 +557,16 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int ret;
 
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		if (unuse_pte_range(vma, pmd, addr, next, entry, page))
-			return 1;
+		ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
+		if (ret)
+			return ret;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
@@ -571,14 +577,16 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 {
 	pud_t *pud;
 	unsigned long next;
+	int ret;
 
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		if (unuse_pmd_range(vma, pud, addr, next, entry, page))
-			return 1;
+		ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
+		if (ret)
+			return ret;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
@@ -588,6 +596,7 @@ static int unuse_vma(struct vm_area_struct *vma,
 {
 	pgd_t *pgd;
 	unsigned long addr, end, next;
+	int ret;
 
 	if (page->mapping) {
 		addr = page_address_in_vma(page, vma);
@@ -605,8 +614,9 @@ static int unuse_vma(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		if (unuse_pud_range(vma, pgd, addr, next, entry, page))
-			return 1;
+		ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
+		if (ret)
+			return ret;
 	} while (pgd++, addr = next, addr != end);
 	return 0;
 }
@@ -615,6 +625,7 @@ static int unuse_mm(struct mm_struct *mm,
 				swp_entry_t entry, struct page *page)
 {
 	struct vm_area_struct *vma;
+	int ret = 0;
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		/*
@@ -627,15 +638,11 @@ static int unuse_mm(struct mm_struct *mm,
 		lock_page(page);
 	}
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && unuse_vma(vma, entry, page))
+		if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
 			break;
 	}
 	up_read(&mm->mmap_sem);
-	/*
-	 * Currently unuse_mm cannot fail, but leave error handling
-	 * at call sites for now, since we change it from time to time.
-	 */
-	return 0;
+	return (ret < 0)? ret: 0;
 }
 
 /*
-- 
cgit 


From 66e1707bc34609f626e2e7b4fe7e454c9748bad5 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:13:56 -0800
Subject: Memory controller: add per cgroup LRU and reclaim

Add the page_cgroup to the per cgroup LRU.  The reclaim algorithm has
been modified to make the isolate_lru_pages() as a pluggable component.  The
scan_control data structure now accepts the cgroup on behalf of which
reclaims are carried out.  try_to_free_pages() has been extended to become
cgroup aware.

[akpm@linux-foundation.org: fix warning]
[Lee.Schermerhorn@hp.com: initialize all scan_control's isolate_pages member]
[bunk@kernel.org: make do_try_to_free_pages() static]
[hugh@veritas.com: memcgroup: fix try_to_free order]
[kamezawa.hiroyu@jp.fujitsu.com: this unlock_page_cgroup() is unnecessary]
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h  |  12 ++++
 include/linux/res_counter.h |  23 +++++++
 include/linux/swap.h        |   3 +
 mm/memcontrol.c             | 148 ++++++++++++++++++++++++++++++++++++++++++--
 mm/swap.c                   |   2 +
 mm/vmscan.c                 | 128 ++++++++++++++++++++++++++++++--------
 6 files changed, 286 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f5b47efab48b..9c3c1c97c197 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,6 +32,13 @@ extern void page_assign_page_cgroup(struct page *page,
 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
 extern void mem_cgroup_uncharge(struct page_cgroup *pc);
+extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
+extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
+					struct list_head *dst,
+					unsigned long *scanned, int order,
+					int mode, struct zone *z,
+					struct mem_cgroup *mem_cont,
+					int active);
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
@@ -71,6 +78,11 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
 
+static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
+						bool active)
+{
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index eeb3f7749772..5e60a4f34243 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -99,4 +99,27 @@ int res_counter_charge(struct res_counter *counter, unsigned long val);
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
 void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
+static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
+{
+	if (cnt->usage < cnt->limit)
+		return true;
+
+	return false;
+}
+
+/*
+ * Helper function to detect if the cgroup is within it's limit or
+ * not. It's currently called from cgroup_rss_prepare()
+ */
+static inline bool res_counter_check_under_limit(struct res_counter *cnt)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	ret = res_counter_limit_check_locked(cnt);
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 353153ea0bd5..4d91bc0e0fd5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -5,6 +5,7 @@
 #include <linux/linkage.h>
 #include <linux/mmzone.h>
 #include <linux/list.h>
+#include <linux/memcontrol.h>
 #include <linux/sched.h>
 
 #include <asm/atomic.h>
@@ -182,6 +183,8 @@ extern void swap_setup(void);
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zone **zones, int order,
 					gfp_t gfp_mask);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem);
+extern int __isolate_lru_page(struct page *page, int mode);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b25df2a9d024..9e9ff914c0f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -22,10 +22,15 @@
 #include <linux/cgroup.h>
 #include <linux/mm.h>
 #include <linux/page-flags.h>
+#include <linux/backing-dev.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
 
 struct cgroup_subsys mem_cgroup_subsys;
+static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
 
 /*
  * The memory controller data structure. The memory controller controls both
@@ -51,6 +56,10 @@ struct mem_cgroup {
 	 */
 	struct list_head active_list;
 	struct list_head inactive_list;
+	/*
+	 * spin_lock to protect the per cgroup LRU
+	 */
+	spinlock_t lru_lock;
 };
 
 /*
@@ -141,6 +150,94 @@ void __always_inline unlock_page_cgroup(struct page *page)
 	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 }
 
+void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+{
+	if (active)
+		list_move(&pc->lru, &pc->mem_cgroup->active_list);
+	else
+		list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
+}
+
+/*
+ * This routine assumes that the appropriate zone's lru lock is already held
+ */
+void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+{
+	struct mem_cgroup *mem;
+	if (!pc)
+		return;
+
+	mem = pc->mem_cgroup;
+
+	spin_lock(&mem->lru_lock);
+	__mem_cgroup_move_lists(pc, active);
+	spin_unlock(&mem->lru_lock);
+}
+
+unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
+					struct list_head *dst,
+					unsigned long *scanned, int order,
+					int mode, struct zone *z,
+					struct mem_cgroup *mem_cont,
+					int active)
+{
+	unsigned long nr_taken = 0;
+	struct page *page;
+	unsigned long scan;
+	LIST_HEAD(pc_list);
+	struct list_head *src;
+	struct page_cgroup *pc;
+
+	if (active)
+		src = &mem_cont->active_list;
+	else
+		src = &mem_cont->inactive_list;
+
+	spin_lock(&mem_cont->lru_lock);
+	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
+		pc = list_entry(src->prev, struct page_cgroup, lru);
+		page = pc->page;
+		VM_BUG_ON(!pc);
+
+		if (PageActive(page) && !active) {
+			__mem_cgroup_move_lists(pc, true);
+			scan--;
+			continue;
+		}
+		if (!PageActive(page) && active) {
+			__mem_cgroup_move_lists(pc, false);
+			scan--;
+			continue;
+		}
+
+		/*
+		 * Reclaim, per zone
+		 * TODO: make the active/inactive lists per zone
+		 */
+		if (page_zone(page) != z)
+			continue;
+
+		/*
+		 * Check if the meta page went away from under us
+		 */
+		if (!list_empty(&pc->lru))
+			list_move(&pc->lru, &pc_list);
+		else
+			continue;
+
+		if (__isolate_lru_page(page, mode) == 0) {
+			list_move(&page->lru, dst);
+			nr_taken++;
+		}
+	}
+
+	list_splice(&pc_list, src);
+	spin_unlock(&mem_cont->lru_lock);
+
+	*scanned = scan;
+	return nr_taken;
+}
+
 /*
  * Charge the memory controller for page usage.
  * Return
@@ -151,6 +248,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc, *race_pc;
+	unsigned long flags;
+	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 
 	/*
 	 * Should page_cgroup's go to their own slab?
@@ -159,14 +258,20 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 	 * to see if the cgroup page already has a page_cgroup associated
 	 * with it
 	 */
+retry:
 	lock_page_cgroup(page);
 	pc = page_get_page_cgroup(page);
 	/*
 	 * The page_cgroup exists and the page has already been accounted
 	 */
 	if (pc) {
-		atomic_inc(&pc->ref_cnt);
-		goto done;
+		if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
+			/* this page is under being uncharged ? */
+			unlock_page_cgroup(page);
+			cpu_relax();
+			goto retry;
+		} else
+			goto done;
 	}
 
 	unlock_page_cgroup(page);
@@ -197,7 +302,32 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 	 * If we created the page_cgroup, we should free it on exceeding
 	 * the cgroup limit.
 	 */
-	if (res_counter_charge(&mem->res, 1)) {
+	while (res_counter_charge(&mem->res, 1)) {
+		if (try_to_free_mem_cgroup_pages(mem))
+			continue;
+
+		/*
+ 		 * try_to_free_mem_cgroup_pages() might not give us a full
+ 		 * picture of reclaim. Some pages are reclaimed and might be
+ 		 * moved to swap cache or just unmapped from the cgroup.
+ 		 * Check the limit again to see if the reclaim reduced the
+ 		 * current usage of the cgroup before giving up
+ 		 */
+		if (res_counter_check_under_limit(&mem->res))
+			continue;
+			/*
+			 * Since we control both RSS and cache, we end up with a
+			 * very interesting scenario where we end up reclaiming
+			 * memory (essentially RSS), since the memory is pushed
+			 * to swap cache, we eventually end up adding those
+			 * pages back to our list. Hence we give ourselves a
+			 * few chances before we fail
+			 */
+		else if (nr_retries--) {
+			congestion_wait(WRITE, HZ/10);
+			continue;
+		}
+
 		css_put(&mem->css);
 		goto free_pc;
 	}
@@ -221,14 +351,16 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
 	pc->page = page;
 	page_assign_page_cgroup(page, pc);
 
+	spin_lock_irqsave(&mem->lru_lock, flags);
+	list_add(&pc->lru, &mem->active_list);
+	spin_unlock_irqrestore(&mem->lru_lock, flags);
+
 done:
 	unlock_page_cgroup(page);
 	return 0;
 free_pc:
 	kfree(pc);
-	return -ENOMEM;
 err:
-	unlock_page_cgroup(page);
 	return -ENOMEM;
 }
 
@@ -240,6 +372,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 {
 	struct mem_cgroup *mem;
 	struct page *page;
+	unsigned long flags;
 
 	if (!pc)
 		return;
@@ -252,6 +385,10 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 		page_assign_page_cgroup(page, NULL);
 		unlock_page_cgroup(page);
 		res_counter_uncharge(&mem->res, 1);
+
+ 		spin_lock_irqsave(&mem->lru_lock, flags);
+ 		list_del_init(&pc->lru);
+ 		spin_unlock_irqrestore(&mem->lru_lock, flags);
 		kfree(pc);
 	}
 }
@@ -310,6 +447,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	res_counter_init(&mem->res);
 	INIT_LIST_HEAD(&mem->active_list);
 	INIT_LIST_HEAD(&mem->inactive_list);
+	spin_lock_init(&mem->lru_lock);
 	return &mem->css;
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 57b7e25a939c..710a20bb9749 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -29,6 +29,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
+#include <linux/memcontrol.h>
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
@@ -175,6 +176,7 @@ void activate_page(struct page *page)
 		SetPageActive(page);
 		add_page_to_active_list(zone, page);
 		__count_vm_event(PGACTIVATE);
+		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5a9597e3bbc..7408a8a7d882 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/memcontrol.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -68,6 +69,15 @@ struct scan_control {
 	int all_unreclaimable;
 
 	int order;
+
+	/* Which cgroup do we reclaim from */
+	struct mem_cgroup *mem_cgroup;
+
+	/* Pluggable isolate pages callback */
+	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
+			unsigned long *scanned, int order, int mode,
+			struct zone *z, struct mem_cgroup *mem_cont,
+			int active);
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -626,7 +636,7 @@ keep:
  *
  * returns 0 on success, -ve errno on failure.
  */
-static int __isolate_lru_page(struct page *page, int mode)
+int __isolate_lru_page(struct page *page, int mode)
 {
 	int ret = -EINVAL;
 
@@ -760,6 +770,21 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	return nr_taken;
 }
 
+static unsigned long isolate_pages_global(unsigned long nr,
+					struct list_head *dst,
+					unsigned long *scanned, int order,
+					int mode, struct zone *z,
+					struct mem_cgroup *mem_cont,
+					int active)
+{
+	if (active)
+		return isolate_lru_pages(nr, &z->active_list, dst,
+						scanned, order, mode);
+	else
+		return isolate_lru_pages(nr, &z->inactive_list, dst,
+						scanned, order, mode);
+}
+
 /*
  * clear_active_flags() is a helper for shrink_active_list(), clearing
  * any active bits from the pages in the list.
@@ -801,11 +826,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_freed;
 		unsigned long nr_active;
 
-		nr_taken = isolate_lru_pages(sc->swap_cluster_max,
-			     &zone->inactive_list,
+		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order,
 			     (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
-					     ISOLATE_BOTH : ISOLATE_INACTIVE);
+					     ISOLATE_BOTH : ISOLATE_INACTIVE,
+				zone, sc->mem_cgroup, 0);
 		nr_active = clear_active_flags(&page_list);
 		__count_vm_events(PGDEACTIVATE, nr_active);
 
@@ -1018,8 +1043,9 @@ force_reclaim_mapped:
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
-	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
-			    &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
+	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
+					ISOLATE_ACTIVE, zone,
+					sc->mem_cgroup, 1);
 	zone->pages_scanned += pgscanned;
 	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
@@ -1051,6 +1077,7 @@ force_reclaim_mapped:
 		ClearPageActive(page);
 
 		list_move(&page->lru, &zone->inactive_list);
+		mem_cgroup_move_lists(page_get_page_cgroup(page), false);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
@@ -1079,6 +1106,7 @@ force_reclaim_mapped:
 		SetPageLRU(page);
 		VM_BUG_ON(!PageActive(page));
 		list_move(&page->lru, &zone->active_list);
+		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
@@ -1206,7 +1234,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
  * holds filesystem locks which prevent writeout this might not work, and the
  * allocation attempt will fail.
  */
-unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
+static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
+					  struct scan_control *sc)
 {
 	int priority;
 	int ret = 0;
@@ -1215,14 +1244,6 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	unsigned long lru_pages = 0;
 	int i;
-	struct scan_control sc = {
-		.gfp_mask = gfp_mask,
-		.may_writepage = !laptop_mode,
-		.swap_cluster_max = SWAP_CLUSTER_MAX,
-		.may_swap = 1,
-		.swappiness = vm_swappiness,
-		.order = order,
-	};
 
 	count_vm_event(ALLOCSTALL);
 
@@ -1237,17 +1258,22 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 	}
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
-		sc.nr_scanned = 0;
+		sc->nr_scanned = 0;
 		if (!priority)
 			disable_swap_token();
-		nr_reclaimed += shrink_zones(priority, zones, &sc);
-		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
+		nr_reclaimed += shrink_zones(priority, zones, sc);
+		/*
+		 * Don't shrink slabs when reclaiming memory from
+		 * over limit cgroups
+		 */
+		if (sc->mem_cgroup == NULL)
+			shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
 		if (reclaim_state) {
 			nr_reclaimed += reclaim_state->reclaimed_slab;
 			reclaim_state->reclaimed_slab = 0;
 		}
-		total_scanned += sc.nr_scanned;
-		if (nr_reclaimed >= sc.swap_cluster_max) {
+		total_scanned += sc->nr_scanned;
+		if (nr_reclaimed >= sc->swap_cluster_max) {
 			ret = 1;
 			goto out;
 		}
@@ -1259,18 +1285,18 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 		 * that's undesirable in laptop mode, where we *want* lumpy
 		 * writeout.  So in laptop mode, write out the whole world.
 		 */
-		if (total_scanned > sc.swap_cluster_max +
-					sc.swap_cluster_max / 2) {
+		if (total_scanned > sc->swap_cluster_max +
+					sc->swap_cluster_max / 2) {
 			wakeup_pdflush(laptop_mode ? 0 : total_scanned);
-			sc.may_writepage = 1;
+			sc->may_writepage = 1;
 		}
 
 		/* Take a nap, wait for some writeback to complete */
-		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
+		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
 			congestion_wait(WRITE, HZ/10);
 	}
 	/* top priority shrink_caches still had more to do? don't OOM, then */
-	if (!sc.all_unreclaimable)
+	if (!sc->all_unreclaimable && sc->mem_cgroup == NULL)
 		ret = 1;
 out:
 	/*
@@ -1293,6 +1319,54 @@ out:
 	return ret;
 }
 
+unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
+{
+	struct scan_control sc = {
+		.gfp_mask = gfp_mask,
+		.may_writepage = !laptop_mode,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
+		.may_swap = 1,
+		.swappiness = vm_swappiness,
+		.order = order,
+		.mem_cgroup = NULL,
+		.isolate_pages = isolate_pages_global,
+	};
+
+	return do_try_to_free_pages(zones, gfp_mask, &sc);
+}
+
+#ifdef CONFIG_CGROUP_MEM_CONT
+
+#ifdef CONFIG_HIGHMEM
+#define ZONE_USERPAGES ZONE_HIGHMEM
+#else
+#define ZONE_USERPAGES ZONE_NORMAL
+#endif
+
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
+{
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_writepage = !laptop_mode,
+		.may_swap = 1,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
+		.swappiness = vm_swappiness,
+		.order = 0,
+		.mem_cgroup = mem_cont,
+		.isolate_pages = mem_cgroup_isolate_pages,
+	};
+	int node;
+	struct zone **zones;
+
+	for_each_online_node(node) {
+		zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones;
+		if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
+			return 1;
+	}
+	return 0;
+}
+#endif
+
 /*
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at pages_high.
@@ -1328,6 +1402,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 		.swap_cluster_max = SWAP_CLUSTER_MAX,
 		.swappiness = vm_swappiness,
 		.order = order,
+		.mem_cgroup = NULL,
+		.isolate_pages = isolate_pages_global,
 	};
 	/*
 	 * temp_priority is used to remember the scanning priority at which
@@ -1649,6 +1725,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 		.swap_cluster_max = nr_pages,
 		.may_writepage = 1,
 		.swappiness = vm_swappiness,
+		.isolate_pages = isolate_pages_global,
 	};
 
 	current->reclaim_state = &reclaim_state;
@@ -1834,6 +1911,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 					SWAP_CLUSTER_MAX),
 		.gfp_mask = gfp_mask,
 		.swappiness = vm_swappiness,
+		.isolate_pages = isolate_pages_global,
 	};
 	unsigned long slab_reclaimable;
 
-- 
cgit 


From 0eea10301708c64a6b793894c156e21ddd15eb64 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:13:57 -0800
Subject: Memory controller improve user interface

Change the interface to use bytes instead of pages.  Page sizes can vary
across platforms and configurations.  A new strategy routine has been added
to the resource counters infrastructure to format the data as desired.

Suggested by David Rientjes, Andrew Morton and Herbert Poetzl

Tested on a UML setup with the config for memory control enabled.

[kamezawa.hiroyu@jp.fujitsu.com: possible race fix in res_counter]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/controllers/memory.txt | 29 ++++++++++++++++++++++++-----
 include/linux/res_counter.h          | 12 +++++++-----
 kernel/res_counter.c                 | 36 +++++++++++++++++++++++++-----------
 mm/memcontrol.c                      | 35 +++++++++++++++++++++++++----------
 4 files changed, 81 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 7e27baacca7b..61df8f81c803 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -165,11 +165,30 @@ c. Enable CONFIG_CGROUP_MEM_CONT
 
 Since now we're in the 0 cgroup,
 We can alter the memory limit:
-# echo -n 6000 > /cgroups/0/memory.limit
+# echo -n 4M > /cgroups/0/memory.limit_in_bytes
+
+NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+mega or gigabytes.
+
+# cat /cgroups/0/memory.limit_in_bytes
+4194304 Bytes
+
+NOTE: The interface has now changed to display the usage in bytes
+instead of pages
 
 We can check the usage:
-# cat /cgroups/0/memory.usage
-25
+# cat /cgroups/0/memory.usage_in_bytes
+1216512 Bytes
+
+A successful write to this file does not guarantee a successful set of
+this limit to the value written into the file.  This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system.  The user is required to re-read
+this file after a write to guarantee the value committed by the kernel.
+
+# echo -n 1 > memory.limit_in_bytes
+# cat memory.limit_in_bytes
+4096 Bytes
 
 The memory.failcnt field gives the number of times that the cgroup limit was
 exceeded.
@@ -206,8 +225,8 @@ cgroup might have some charge associated with it, even though all
 tasks have migrated away from it. If some pages are still left, after following
 the steps listed in sections 4.1 and 4.2, check the Swap Cache usage in
 /proc/meminfo to see if the Swap Cache usage is showing up in the
-cgroups memory.usage counter. A simple test of swapoff -a and swapon -a
-should free any pending Swap Cache usage.
+cgroups memory.usage_in_bytes counter. A simple test of swapoff -a and
+swapon -a should free any pending Swap Cache usage.
 
 4.4 Choosing what to account  -- Page Cache (unmapped) vs RSS (mapped)?
 
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 5e60a4f34243..61363ce896d5 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -23,15 +23,15 @@ struct res_counter {
 	/*
 	 * the current resource consumption level
 	 */
-	unsigned long usage;
+	unsigned long long usage;
 	/*
 	 * the limit that usage cannot exceed
 	 */
-	unsigned long limit;
+	unsigned long long limit;
 	/*
 	 * the number of unsuccessful attempts to consume the resource
 	 */
-	unsigned long failcnt;
+	unsigned long long failcnt;
 	/*
 	 * the lock to protect all of the above.
 	 * the routines below consider this to be IRQ-safe
@@ -52,9 +52,11 @@ struct res_counter {
  */
 
 ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos);
+		const char __user *buf, size_t nbytes, loff_t *pos,
+		int (*read_strategy)(unsigned long long val, char *s));
 ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos);
+		const char __user *buf, size_t nbytes, loff_t *pos,
+		int (*write_strategy)(char *buf, unsigned long long *val));
 
 /*
  * the field descriptors. one for each member of res_counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 722c484b068b..16cbec2d5d60 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -16,7 +16,7 @@
 void res_counter_init(struct res_counter *counter)
 {
 	spin_lock_init(&counter->lock);
-	counter->limit = (unsigned long)LONG_MAX;
+	counter->limit = (unsigned long long)LLONG_MAX;
 }
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -59,8 +59,8 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 }
 
 
-static inline unsigned long *res_counter_member(struct res_counter *counter,
-						int member)
+static inline unsigned long long *
+res_counter_member(struct res_counter *counter, int member)
 {
 	switch (member) {
 	case RES_USAGE:
@@ -76,24 +76,30 @@ static inline unsigned long *res_counter_member(struct res_counter *counter,
 }
 
 ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos)
+		const char __user *userbuf, size_t nbytes, loff_t *pos,
+		int (*read_strategy)(unsigned long long val, char *st_buf))
 {
-	unsigned long *val;
+	unsigned long long *val;
 	char buf[64], *s;
 
 	s = buf;
 	val = res_counter_member(counter, member);
-	s += sprintf(s, "%lu\n", *val);
+	if (read_strategy)
+		s += read_strategy(*val, s);
+	else
+		s += sprintf(s, "%llu\n", *val);
 	return simple_read_from_buffer((void __user *)userbuf, nbytes,
 			pos, buf, s - buf);
 }
 
 ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos)
+		const char __user *userbuf, size_t nbytes, loff_t *pos,
+		int (*write_strategy)(char *st_buf, unsigned long long *val))
 {
 	int ret;
 	char *buf, *end;
-	unsigned long tmp, *val;
+	unsigned long flags;
+	unsigned long long tmp, *val;
 
 	buf = kmalloc(nbytes + 1, GFP_KERNEL);
 	ret = -ENOMEM;
@@ -106,12 +112,20 @@ ssize_t res_counter_write(struct res_counter *counter, int member,
 		goto out_free;
 
 	ret = -EINVAL;
-	tmp = simple_strtoul(buf, &end, 10);
-	if (*end != '\0')
-		goto out_free;
 
+	if (write_strategy) {
+		if (write_strategy(buf, &tmp)) {
+			goto out_free;
+		}
+	} else {
+		tmp = simple_strtoull(buf, &end, 10);
+		if (*end != '\0')
+			goto out_free;
+	}
+	spin_lock_irqsave(&counter->lock, flags);
 	val = res_counter_member(counter, member);
 	*val = tmp;
+	spin_unlock_irqrestore(&counter->lock, flags);
 	ret = nbytes;
 out_free:
 	kfree(buf);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9e9ff914c0f1..d73692279ab1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -302,7 +302,7 @@ retry:
 	 * If we created the page_cgroup, we should free it on exceeding
 	 * the cgroup limit.
 	 */
-	while (res_counter_charge(&mem->res, 1)) {
+	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
 		if (try_to_free_mem_cgroup_pages(mem))
 			continue;
 
@@ -341,7 +341,7 @@ retry:
 		kfree(pc);
 		pc = race_pc;
 		atomic_inc(&pc->ref_cnt);
-		res_counter_uncharge(&mem->res, 1);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		css_put(&mem->css);
 		goto done;
 	}
@@ -384,7 +384,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 		css_put(&mem->css);
 		page_assign_page_cgroup(page, NULL);
 		unlock_page_cgroup(page);
-		res_counter_uncharge(&mem->res, 1);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 
  		spin_lock_irqsave(&mem->lru_lock, flags);
  		list_del_init(&pc->lru);
@@ -393,12 +393,26 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 	}
 }
 
-static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
-			struct file *file, char __user *userbuf, size_t nbytes,
-			loff_t *ppos)
+int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
+{
+	*tmp = memparse(buf, &buf);
+	if (*buf != '\0')
+		return -EINVAL;
+
+	/*
+	 * Round up the value to the closest page size
+	 */
+	*tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
+	return 0;
+}
+
+static ssize_t mem_cgroup_read(struct cgroup *cont,
+			struct cftype *cft, struct file *file,
+			char __user *userbuf, size_t nbytes, loff_t *ppos)
 {
 	return res_counter_read(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos);
+				cft->private, userbuf, nbytes, ppos,
+				NULL);
 }
 
 static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
@@ -406,17 +420,18 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 				size_t nbytes, loff_t *ppos)
 {
 	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos);
+				cft->private, userbuf, nbytes, ppos,
+				mem_cgroup_write_strategy);
 }
 
 static struct cftype mem_cgroup_files[] = {
 	{
-		.name = "usage",
+		.name = "usage_in_bytes",
 		.private = RES_USAGE,
 		.read = mem_cgroup_read,
 	},
 	{
-		.name = "limit",
+		.name = "limit_in_bytes",
 		.private = RES_LIMIT,
 		.write = mem_cgroup_write,
 		.read = mem_cgroup_read,
-- 
cgit 


From c7ba5c9e8176704bfac0729875fa62798037584d Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Thu, 7 Feb 2008 00:13:58 -0800
Subject: Memory controller: OOM handling

Out of memory handling for cgroups over their limit. A task from the
cgroup over limit is chosen using the existing OOM logic and killed.

TODO:
1. As discussed in the OLS BOF session, consider implementing a user
space policy for OOM handling.

[akpm@linux-foundation.org: fix build due to oom-killer changes]
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c            |  1 +
 mm/oom_kill.c              | 43 +++++++++++++++++++++++++++++++++++++++----
 3 files changed, 41 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9c3c1c97c197..9bbbf524ba8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -39,6 +39,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					int mode, struct zone *z,
 					struct mem_cgroup *mem_cont,
 					int active);
+extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d73692279ab1..5260658c90aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -329,6 +329,7 @@ retry:
 		}
 
 		css_put(&mem->css);
+		mem_cgroup_out_of_memory(mem, GFP_KERNEL);
 		goto free_pc;
 	}
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c1850bf991cd..64751dc9d997 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -25,6 +25,7 @@
 #include <linux/cpuset.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/memcontrol.h>
 
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
@@ -50,7 +51,8 @@ static DEFINE_SPINLOCK(zone_scan_mutex);
  *    of least surprise ... (be careful when you change it)
  */
 
-unsigned long badness(struct task_struct *p, unsigned long uptime)
+unsigned long badness(struct task_struct *p, unsigned long uptime,
+			struct mem_cgroup *mem)
 {
 	unsigned long points, cpu_time, run_time, s;
 	struct mm_struct *mm;
@@ -63,6 +65,13 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		return 0;
 	}
 
+#ifdef CONFIG_CGROUP_MEM_CONT
+	if (mem != NULL && mm->mem_cgroup != mem) {
+		task_unlock(p);
+		return 0;
+	}
+#endif
+
 	/*
 	 * The memory size of the process is the basis for the badness.
 	 */
@@ -193,7 +202,8 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
  *
  * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned long *ppoints)
+static struct task_struct *select_bad_process(unsigned long *ppoints,
+						struct mem_cgroup *mem)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
@@ -247,7 +257,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 		if (p->oomkilladj == OOM_DISABLE)
 			continue;
 
-		points = badness(p, uptime.tv_sec);
+		points = badness(p, uptime.tv_sec, mem);
 		if (points > *ppoints || !chosen) {
 			chosen = p;
 			*ppoints = points;
@@ -368,6 +378,31 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	return oom_kill_task(p);
 }
 
+#ifdef CONFIG_CGROUP_MEM_CONT
+void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
+{
+	unsigned long points = 0;
+	struct task_struct *p;
+
+	cgroup_lock();
+	rcu_read_lock();
+retry:
+	p = select_bad_process(&points, mem);
+	if (PTR_ERR(p) == -1UL)
+		goto out;
+
+	if (!p)
+		p = current;
+
+	if (oom_kill_process(p, gfp_mask, 0, points,
+				"Memory cgroup out of memory"))
+		goto retry;
+out:
+	rcu_read_unlock();
+	cgroup_unlock();
+}
+#endif
+
 static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
 
 int register_oom_notifier(struct notifier_block *nb)
@@ -484,7 +519,7 @@ retry:
 		 * Rambo mode: Shoot down a process and hope it solves whatever
 		 * issues we may have.
 		 */
-		p = select_bad_process(&points);
+		p = select_bad_process(&points, NULL);
 
 		if (PTR_ERR(p) == -1UL)
 			goto out;
-- 
cgit 


From 8697d33194faae6fdd6b2e799f6308aa00cfdf67 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:13:59 -0800
Subject: Memory controller: add switch to control what type of pages to limit

Choose if we want cached pages to be accounted or not.  By default both are
accounted for.  A new set of tunables are added.

echo -n 1 > mem_control_type

switches the accounting to account for only mapped pages

echo -n 3 > mem_control_type

switches the behaviour back

[bunk@kernel.org: mm/memcontrol.c: clenups]
[akpm@linux-foundation.org: fix sparc32 build]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  9 +++++
 mm/filemap.c               |  2 +-
 mm/memcontrol.c            | 98 ++++++++++++++++++++++++++++++++++++++++++++--
 mm/swap_state.c            |  2 +-
 4 files changed, 106 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9bbbf524ba8f..bb6f5105401b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,8 @@
 
 struct mem_cgroup;
 struct page_cgroup;
+struct page;
+struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_CONT
 
@@ -40,6 +42,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
+extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm);
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
@@ -84,6 +87,12 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
 {
 }
 
+static inline int mem_cgroup_cache_charge(struct page *page,
+						struct mm_struct *mm)
+{
+	return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index b7a01e927953..8ae171cc2811 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 
 	if (error == 0) {
 
-		error = mem_cgroup_charge(page, current->mm);
+		error = mem_cgroup_cache_charge(page, current->mm);
 		if (error)
 			goto out;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5260658c90aa..10833d969e3f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,8 @@
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 
+#include <asm/uaccess.h>
+
 struct cgroup_subsys mem_cgroup_subsys;
 static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
 
@@ -60,6 +62,7 @@ struct mem_cgroup {
 	 * spin_lock to protect the per cgroup LRU
 	 */
 	spinlock_t lru_lock;
+	unsigned long control_type;	/* control RSS or RSS+Pagecache */
 };
 
 /*
@@ -82,6 +85,15 @@ struct page_cgroup {
 					/* mapped and cached states     */
 };
 
+enum {
+	MEM_CGROUP_TYPE_UNSPEC = 0,
+	MEM_CGROUP_TYPE_MAPPED,
+	MEM_CGROUP_TYPE_CACHED,
+	MEM_CGROUP_TYPE_ALL,
+	MEM_CGROUP_TYPE_MAX,
+};
+
+static struct mem_cgroup init_mem_cgroup;
 
 static inline
 struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -139,18 +151,18 @@ struct page_cgroup *page_get_page_cgroup(struct page *page)
 		(page->page_cgroup & ~PAGE_CGROUP_LOCK);
 }
 
-void __always_inline lock_page_cgroup(struct page *page)
+static void __always_inline lock_page_cgroup(struct page *page)
 {
 	bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 	VM_BUG_ON(!page_cgroup_locked(page));
 }
 
-void __always_inline unlock_page_cgroup(struct page *page)
+static void __always_inline unlock_page_cgroup(struct page *page)
 {
 	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 }
 
-void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
 	if (active)
 		list_move(&pc->lru, &pc->mem_cgroup->active_list);
@@ -365,6 +377,22 @@ err:
 	return -ENOMEM;
 }
 
+/*
+ * See if the cached pages should be charged at all?
+ */
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
+{
+	struct mem_cgroup *mem;
+	if (!mm)
+		mm = &init_mm;
+
+	mem = rcu_dereference(mm->mem_cgroup);
+	if (mem->control_type == MEM_CGROUP_TYPE_ALL)
+		return mem_cgroup_charge(page, mm);
+	else
+		return 0;
+}
+
 /*
  * Uncharging is always a welcome operation, we never complain, simply
  * uncharge.
@@ -375,6 +403,10 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 	struct page *page;
 	unsigned long flags;
 
+	/*
+	 * This can handle cases when a page is not charged at all and we
+	 * are switching between handling the control_type.
+	 */
 	if (!pc)
 		return;
 
@@ -425,6 +457,60 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 				mem_cgroup_write_strategy);
 }
 
+static ssize_t mem_control_type_write(struct cgroup *cont,
+			struct cftype *cft, struct file *file,
+			const char __user *userbuf,
+			size_t nbytes, loff_t *pos)
+{
+	int ret;
+	char *buf, *end;
+	unsigned long tmp;
+	struct mem_cgroup *mem;
+
+	mem = mem_cgroup_from_cont(cont);
+	buf = kmalloc(nbytes + 1, GFP_KERNEL);
+	ret = -ENOMEM;
+	if (buf == NULL)
+		goto out;
+
+	buf[nbytes] = 0;
+	ret = -EFAULT;
+	if (copy_from_user(buf, userbuf, nbytes))
+		goto out_free;
+
+	ret = -EINVAL;
+	tmp = simple_strtoul(buf, &end, 10);
+	if (*end != '\0')
+		goto out_free;
+
+	if (tmp <= MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
+		goto out_free;
+
+	mem->control_type = tmp;
+	ret = nbytes;
+out_free:
+	kfree(buf);
+out:
+	return ret;
+}
+
+static ssize_t mem_control_type_read(struct cgroup *cont,
+				struct cftype *cft,
+				struct file *file, char __user *userbuf,
+				size_t nbytes, loff_t *ppos)
+{
+	unsigned long val;
+	char buf[64], *s;
+	struct mem_cgroup *mem;
+
+	mem = mem_cgroup_from_cont(cont);
+	s = buf;
+	val = mem->control_type;
+	s += sprintf(s, "%lu\n", val);
+	return simple_read_from_buffer((void __user *)userbuf, nbytes,
+			ppos, buf, s - buf);
+}
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -442,6 +528,11 @@ static struct cftype mem_cgroup_files[] = {
 		.private = RES_FAILCNT,
 		.read = mem_cgroup_read,
 	},
+	{
+		.name = "control_type",
+		.write = mem_control_type_write,
+		.read = mem_control_type_read,
+	},
 };
 
 static struct mem_cgroup init_mem_cgroup;
@@ -464,6 +555,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	INIT_LIST_HEAD(&mem->active_list);
 	INIT_LIST_HEAD(&mem->inactive_list);
 	spin_lock_init(&mem->lru_lock);
+	mem->control_type = MEM_CGROUP_TYPE_ALL;
 	return &mem->css;
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f96e3ff1e791..88258869c8e7 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
 
-		error = mem_cgroup_charge(page, current->mm);
+		error = mem_cgroup_cache_charge(page, current->mm);
 		if (error)
 			goto out;
 
-- 
cgit 


From bed7161a519a2faef53e1bce1b47595e297c1d14 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:14:01 -0800
Subject: Memory controller: make page_referenced() cgroup aware

Make page_referenced() cgroup aware.  Without this patch, page_referenced()
can cause a page to be skipped while reclaiming pages.  This patch ensures
that other cgroups do not hold pages in a particular cgroup hostage.  It
is required to ensure that shared pages are freed from a cgroup when they
are not actively referenced from the cgroup that brought them in

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 include/linux/rmap.h       |  5 +++--
 mm/memcontrol.c            |  5 +++++
 mm/rmap.c                  | 30 ++++++++++++++++++++++++------
 mm/vmscan.c                |  4 ++--
 5 files changed, 40 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bb6f5105401b..9d0a830423b6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -43,6 +43,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					int active);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm);
+extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
@@ -93,6 +94,11 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
+static inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 97347f22fc20..1383692ac5bd 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
+#include <linux/memcontrol.h>
 
 /*
  * The anon_vma heads a list of private "related" vmas, to scan if
@@ -86,7 +87,7 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma,
 /*
  * Called from mm/vmscan.c to handle paging out
  */
-int page_referenced(struct page *, int is_locked);
+int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt);
 int try_to_unmap(struct page *, int ignore_refs);
 
 /*
@@ -114,7 +115,7 @@ int page_mkclean(struct page *);
 #define anon_vma_prepare(vma)	(0)
 #define anon_vma_link(vma)	do {} while (0)
 
-#define page_referenced(page,l) TestClearPageReferenced(page)
+#define page_referenced(page,l,cnt) TestClearPageReferenced(page)
 #define try_to_unmap(page, refs) SWAP_FAIL
 
 static inline int page_mkclean(struct page *page)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10833d969e3f..ff7cac602984 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -110,6 +110,11 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 				struct mem_cgroup, css);
 }
 
+inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm)
+{
+	return rcu_dereference(mm->mem_cgroup);
+}
+
 void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
 {
 	struct mem_cgroup *mem;
diff --git a/mm/rmap.c b/mm/rmap.c
index 4a3487921eff..a0e92a263d12 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -302,7 +302,8 @@ out:
 	return referenced;
 }
 
-static int page_referenced_anon(struct page *page)
+static int page_referenced_anon(struct page *page,
+				struct mem_cgroup *mem_cont)
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
@@ -315,6 +316,13 @@ static int page_referenced_anon(struct page *page)
 
 	mapcount = page_mapcount(page);
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		/*
+		 * If we are reclaiming on behalf of a cgroup, skip
+		 * counting on behalf of references from different
+		 * cgroups
+		 */
+		if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont))
+			continue;
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
 			break;
@@ -335,7 +343,8 @@ static int page_referenced_anon(struct page *page)
  *
  * This function is only called from page_referenced for object-based pages.
  */
-static int page_referenced_file(struct page *page)
+static int page_referenced_file(struct page *page,
+				struct mem_cgroup *mem_cont)
 {
 	unsigned int mapcount;
 	struct address_space *mapping = page->mapping;
@@ -368,6 +377,13 @@ static int page_referenced_file(struct page *page)
 	mapcount = page_mapcount(page);
 
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		/*
+		 * If we are reclaiming on behalf of a cgroup, skip
+		 * counting on behalf of references from different
+		 * cgroups
+		 */
+		if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont))
+			continue;
 		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
 				  == (VM_LOCKED|VM_MAYSHARE)) {
 			referenced++;
@@ -390,7 +406,8 @@ static int page_referenced_file(struct page *page)
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
-int page_referenced(struct page *page, int is_locked)
+int page_referenced(struct page *page, int is_locked,
+			struct mem_cgroup *mem_cont)
 {
 	int referenced = 0;
 
@@ -402,14 +419,15 @@ int page_referenced(struct page *page, int is_locked)
 
 	if (page_mapped(page) && page->mapping) {
 		if (PageAnon(page))
-			referenced += page_referenced_anon(page);
+			referenced += page_referenced_anon(page, mem_cont);
 		else if (is_locked)
-			referenced += page_referenced_file(page);
+			referenced += page_referenced_file(page, mem_cont);
 		else if (TestSetPageLocked(page))
 			referenced++;
 		else {
 			if (page->mapping)
-				referenced += page_referenced_file(page);
+				referenced +=
+					page_referenced_file(page, mem_cont);
 			unlock_page(page);
 		}
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7408a8a7d882..215f6a726b2f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -503,7 +503,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto keep_locked;
 		}
 
-		referenced = page_referenced(page, 1);
+		referenced = page_referenced(page, 1, sc->mem_cgroup);
 		/* In active use or really unfreeable?  Activate it. */
 		if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
 					referenced && page_mapping_inuse(page))
@@ -1057,7 +1057,7 @@ force_reclaim_mapped:
 		if (page_mapped(page)) {
 			if (!reclaim_mapped ||
 			    (total_swap_pages == 0 && PageAnon(page)) ||
-			    page_referenced(page, 0)) {
+			    page_referenced(page, 0, sc->mem_cgroup)) {
 				list_add(&page->lru, &l_active);
 				continue;
 			}
-- 
cgit 


From e1a1cd590e3fcb0d2e230128daf2337ea55387dc Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:14:02 -0800
Subject: Memory controller: make charging gfp mask aware

Nick Piggin pointed out that swap cache and page cache addition routines
could be called from non GFP_KERNEL contexts.  This patch makes the
charging routine aware of the gfp context.  Charging might fail if the
cgroup is over it's limit, in which case a suitable error is returned.

This patch was tested on a Powerpc box.  I am still looking at being able
to test the path, through which allocations happen in non GFP_KERNEL
contexts.

[kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 12 ++++++++----
 include/linux/swap.h       |  3 ++-
 mm/filemap.c               |  2 +-
 mm/memcontrol.c            | 24 +++++++++++++++++-------
 mm/memory.c                | 10 +++++-----
 mm/migrate.c               |  2 +-
 mm/swap_state.c            |  2 +-
 mm/swapfile.c              |  2 +-
 mm/vmscan.c                | 14 +++++---------
 9 files changed, 41 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9d0a830423b6..cc0ad7191acd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm);
 extern void page_assign_page_cgroup(struct page *page,
 					struct page_cgroup *pc);
 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
-extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm);
+extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+				gfp_t gfp_mask);
 extern void mem_cgroup_uncharge(struct page_cgroup *pc);
 extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
-extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm);
+extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+					gfp_t gfp_mask);
 extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
@@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
 	return NULL;
 }
 
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
+static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+					gfp_t gfp_mask)
 {
 	return 0;
 }
@@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
 }
 
 static inline int mem_cgroup_cache_charge(struct page *page,
-						struct mm_struct *mm)
+						struct mm_struct *mm,
+						gfp_t gfp_mask)
 {
 	return 0;
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d91bc0e0fd5..3ca5c4bd6d3f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -183,7 +183,8 @@ extern void swap_setup(void);
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zone **zones, int order,
 					gfp_t gfp_mask);
-extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
+							gfp_t gfp_mask);
 extern int __isolate_lru_page(struct page *page, int mode);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
diff --git a/mm/filemap.c b/mm/filemap.c
index 8ae171cc2811..63040d5e0ae2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 
 	if (error == 0) {
 
-		error = mem_cgroup_cache_charge(page, current->mm);
+		error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
 		if (error)
 			goto out;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff7cac602984..ac8774426fec 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  * 0 if the charge was successful
  * < 0 if the cgroup is over its limit
  */
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+				gfp_t gfp_mask)
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc, *race_pc;
@@ -293,7 +294,7 @@ retry:
 
 	unlock_page_cgroup(page);
 
-	pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL);
+	pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
 	if (pc == NULL)
 		goto err;
 
@@ -320,7 +321,14 @@ retry:
 	 * the cgroup limit.
 	 */
 	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
-		if (try_to_free_mem_cgroup_pages(mem))
+		bool is_atomic = gfp_mask & GFP_ATOMIC;
+		/*
+		 * We cannot reclaim under GFP_ATOMIC, fail the charge
+		 */
+		if (is_atomic)
+			goto noreclaim;
+
+		if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
 			continue;
 
 		/*
@@ -344,9 +352,10 @@ retry:
 			congestion_wait(WRITE, HZ/10);
 			continue;
 		}
-
+noreclaim:
 		css_put(&mem->css);
-		mem_cgroup_out_of_memory(mem, GFP_KERNEL);
+		if (!is_atomic)
+			mem_cgroup_out_of_memory(mem, GFP_KERNEL);
 		goto free_pc;
 	}
 
@@ -385,7 +394,8 @@ err:
 /*
  * See if the cached pages should be charged at all?
  */
-int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+				gfp_t gfp_mask)
 {
 	struct mem_cgroup *mem;
 	if (!mm)
@@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
 
 	mem = rcu_dereference(mm->mem_cgroup);
 	if (mem->control_type == MEM_CGROUP_TYPE_ALL)
-		return mem_cgroup_charge(page, mm);
+		return mem_cgroup_charge(page, mm, gfp_mask);
 	else
 		return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 0ba224ea6ba4..153a54b2013c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	retval = mem_cgroup_charge(page, mm);
+	retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
 	if (retval)
 		goto out;
 
@@ -1650,7 +1650,7 @@ gotten:
 	cow_user_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
-	if (mem_cgroup_charge(new_page, mm))
+	if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
 		goto oom_free_new;
 
 	/*
@@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(PGMAJFAULT);
 	}
 
-	if (mem_cgroup_charge(page, mm)) {
+	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
 		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 		ret = VM_FAULT_OOM;
 		goto out;
@@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto oom;
 	__SetPageUptodate(page);
 
-	if (mem_cgroup_charge(page, mm))
+	if (mem_cgroup_charge(page, mm, GFP_KERNEL))
 		goto oom_free_page;
 
 	entry = mk_pte(page, vma->vm_page_prot);
@@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	}
 
-	if (mem_cgroup_charge(page, mm)) {
+	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
 		ret = VM_FAULT_OOM;
 		goto out;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 417bbda14e5b..763794144697 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
  		return;
  	}
 
-	if (mem_cgroup_charge(new, mm)) {
+	if (mem_cgroup_charge(new, mm, GFP_KERNEL)) {
 		pte_unmap(ptep);
 		return;
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 88258869c8e7..581b609e748d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
 
-		error = mem_cgroup_cache_charge(page, current->mm);
+		error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
 		if (error)
 			goto out;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fddc4cc4149b..35e00c3d0286 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free)
 static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
-	if (mem_cgroup_charge(page, vma->vm_mm))
+	if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
 		return -ENOMEM;
 
 	inc_mm_counter(vma->vm_mm, anon_rss);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 215f6a726b2f..b7d868cbca09 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 
 #ifdef CONFIG_CGROUP_MEM_CONT
 
-#ifdef CONFIG_HIGHMEM
-#define ZONE_USERPAGES ZONE_HIGHMEM
-#else
-#define ZONE_USERPAGES ZONE_NORMAL
-#endif
-
-unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
+						gfp_t gfp_mask)
 {
 	struct scan_control sc = {
-		.gfp_mask = GFP_KERNEL,
+		.gfp_mask = gfp_mask,
 		.may_writepage = !laptop_mode,
 		.may_swap = 1,
 		.swap_cluster_max = SWAP_CLUSTER_MAX,
@@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
 	};
 	int node;
 	struct zone **zones;
+	int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
 
 	for_each_online_node(node) {
-		zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones;
+		zones = NODE_DATA(node)->node_zonelists[target_zone].zones;
 		if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
 			return 1;
 	}
-- 
cgit 


From 3062fc67dad01b1d2a15d58c709eff946389eca4 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 7 Feb 2008 00:14:03 -0800
Subject: memcontrol: move mm_cgroup to header file

Inline functions must preceed their use, so mm_cgroup() should be defined
in linux/memcontrol.h.

include/linux/memcontrol.h:48: warning: 'mm_cgroup' declared inline after
	being called
include/linux/memcontrol.h:48: warning: previous declaration of
	'mm_cgroup' was here

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: nuther build fix]
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 +++++++++--
 mm/memcontrol.c            |  5 -----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index cc0ad7191acd..4f580268b1b7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -20,6 +20,9 @@
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
 
+#include <linux/rcupdate.h>
+#include <linux/mm.h>
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
@@ -45,7 +48,11 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
-extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);
+
+static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
+{
+	return rcu_dereference(mm->mem_cgroup);
+}
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
@@ -98,7 +105,7 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
-static inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm)
+static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
 {
 	return NULL;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ac8774426fec..f6cdbf755ed3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -110,11 +110,6 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 				struct mem_cgroup, css);
 }
 
-inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm)
-{
-	return rcu_dereference(mm->mem_cgroup);
-}
-
 void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
 {
 	struct mem_cgroup *mem;
-- 
cgit 


From 4c4a22148909e4c003562ea7ffe0a06e26919e3c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 7 Feb 2008 00:14:06 -0800
Subject: memcontrol: move oom task exclusion to tasklist scan

Creates a helper function to return non-zero if a task is a member of a
memory controller:

	int task_in_mem_cgroup(const struct task_struct *task,
			       const struct mem_cgroup *mem);

When the OOM killer is constrained by the memory controller, the exclusion
of tasks that are not a member of that controller was previously misplaced
and appeared in the badness scoring function.  It should be excluded
during the tasklist scan in select_bad_process() instead.

[akpm@linux-foundation.org: build fix]
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 10 ++++++++++
 mm/oom_kill.c              |  9 ++-------
 3 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4f580268b1b7..42536c737d9c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,6 +48,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
 {
@@ -110,6 +111,12 @@ static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
 	return NULL;
 }
 
+static inline int task_in_mem_cgroup(struct task_struct *task,
+				     const struct mem_cgroup *mem)
+{
+	return 1;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6cdbf755ed3..2fadd4896a14 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -170,6 +170,16 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 		list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
 }
 
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
+{
+	int ret;
+
+	task_lock(task);
+	ret = task->mm && mm_cgroup(task->mm) == mem;
+	task_unlock(task);
+	return ret;
+}
+
 /*
  * This routine assumes that the appropriate zone's lru lock is already held
  */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 64751dc9d997..ef5084dbc793 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -65,13 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime,
 		return 0;
 	}
 
-#ifdef CONFIG_CGROUP_MEM_CONT
-	if (mem != NULL && mm->mem_cgroup != mem) {
-		task_unlock(p);
-		return 0;
-	}
-#endif
-
 	/*
 	 * The memory size of the process is the basis for the badness.
 	 */
@@ -223,6 +216,8 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 		/* skip the init task */
 		if (is_global_init(p))
 			continue;
+		if (mem && !task_in_mem_cgroup(p, mem))
+			continue;
 
 		/*
 		 * This task already has access to memory reserves and is
-- 
cgit 


From ae41be374293e70e1ed441d986afcc6e744ef9d9 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:10 -0800
Subject: bugfix for memory cgroup controller: migration under memory
 controller fix

While using memory control cgroup, page-migration under it works as following.
==
 1. uncharge all refs at try to unmap.
 2. charge regs again remove_migration_ptes()
==
This is simple but has following problems.
==
 The page is uncharged and charged back again if *mapped*.
    - This means that cgroup before migration can be different from one after
      migration
    - If page is not mapped but charged as page cache, charge is just ignored
      (because not mapped, it will not be uncharged before migration)
      This is memory leak.
==
This patch tries to keep memory cgroup at page migration by increasing
one refcnt during it. 3 functions are added.

 mem_cgroup_prepare_migration() --- increase refcnt of page->page_cgroup
 mem_cgroup_end_migration()     --- decrease refcnt of page->page_cgroup
 mem_cgroup_page_migration() --- copy page->page_cgroup from old page to
                                 new page.

During migration
  - old page is under PG_locked.
  - new page is under PG_locked, too.
  - both old page and new page is not on LRU.

These 3 facts guarantee that page_cgroup() migration has no race.

Tested and worked well in x86_64/fake-NUMA box.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 19 +++++++++++++++++++
 mm/memcontrol.c            | 43 +++++++++++++++++++++++++++++++++++++++++++
 mm/migrate.c               | 13 ++++++++++---
 3 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 42536c737d9c..4ec712967f7c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,6 +60,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 	mem_cgroup_uncharge(page_get_page_cgroup(page));
 }
 
+extern int mem_cgroup_prepare_migration(struct page *page);
+extern void mem_cgroup_end_migration(struct page *page);
+extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+
 #else /* CONFIG_CGROUP_MEM_CONT */
 static inline void mm_init_cgroup(struct mm_struct *mm,
 					struct task_struct *p)
@@ -117,6 +121,21 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
+static inline int mem_cgroup_prepare_migration(struct page *page)
+{
+	return 0;
+}
+
+static inline void mem_cgroup_end_migration(struct page *page)
+{
+}
+
+static inline void
+mem_cgroup_page_migration(struct page *page, struct page *newpage)
+{
+}
+
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3270ce7375db..128f45c16fa6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -492,6 +492,49 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 		}
 	}
 }
+/*
+ * Returns non-zero if a page (under migration) has valid page_cgroup member.
+ * Refcnt of page_cgroup is incremented.
+ */
+
+int mem_cgroup_prepare_migration(struct page *page)
+{
+	struct page_cgroup *pc;
+	int ret = 0;
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	if (pc && atomic_inc_not_zero(&pc->ref_cnt))
+		ret = 1;
+	unlock_page_cgroup(page);
+	return ret;
+}
+
+void mem_cgroup_end_migration(struct page *page)
+{
+	struct page_cgroup *pc = page_get_page_cgroup(page);
+	mem_cgroup_uncharge(pc);
+}
+/*
+ * We know both *page* and *newpage* are now not-on-LRU and Pg_locked.
+ * And no race with uncharge() routines because page_cgroup for *page*
+ * has extra one reference by mem_cgroup_prepare_migration.
+ */
+
+void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+{
+	struct page_cgroup *pc;
+retry:
+	pc = page_get_page_cgroup(page);
+	if (!pc)
+		return;
+	if (clear_page_cgroup(page, pc) != pc)
+		goto retry;
+	pc->page = newpage;
+	lock_page_cgroup(newpage);
+	page_assign_page_cgroup(newpage, pc);
+	unlock_page_cgroup(newpage);
+	return;
+}
 
 int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
 {
diff --git a/mm/migrate.c b/mm/migrate.c
index 763794144697..a73504ff5ab9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -593,9 +593,10 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	else
 		rc = fallback_migrate_page(mapping, newpage, page);
 
-	if (!rc)
+	if (!rc) {
+		mem_cgroup_page_migration(page, newpage);
 		remove_migration_ptes(page, newpage);
-	else
+	} else
 		newpage->mapping = NULL;
 
 	unlock_page(newpage);
@@ -614,6 +615,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	int *result = NULL;
 	struct page *newpage = get_new_page(page, private, &result);
 	int rcu_locked = 0;
+	int charge = 0;
 
 	if (!newpage)
 		return -ENOMEM;
@@ -673,14 +675,19 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		goto rcu_unlock;
 	}
 
+	charge = mem_cgroup_prepare_migration(page);
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
 
-	if (rc)
+	if (rc) {
 		remove_migration_ptes(page, page);
+		if (charge)
+			mem_cgroup_end_migration(page);
+	} else if (charge)
+ 		mem_cgroup_end_migration(newpage);
 rcu_unlock:
 	if (rcu_locked)
 		rcu_read_unlock();
-- 
cgit 


From 4fca88c87b7969c698912e2de9b1b31088c777cb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:27 -0800
Subject: memory cgroup enhancements: add- pre_destroy() handler

Add a handler "pre_destroy" to cgroup_subsys.  It is called before
cgroup_rmdir() checks all subsys's refcnt.

I think this is useful for subsys which have some extra refs even if there
are no tasks in cgroup.  By adding pre_destroy(), the kernel keeps the rule
"destroy() against subsystem is called only when refcnt=0." and allows css
ref to be used by other objects than tasks.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 87479328d46d..d8e92223a79c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -233,6 +233,7 @@ int cgroup_is_descendant(const struct cgroup *cont);
 struct cgroup_subsys {
 	struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
 						  struct cgroup *cont);
+	void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
 	int (*can_attach)(struct cgroup_subsys *ss,
 			  struct cgroup *cont, struct task_struct *tsk);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4d67a39c58a8..4e8b16a8266c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -586,6 +586,21 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 	return inode;
 }
 
+/*
+ * Call subsys's pre_destroy handler.
+ * This is called before css refcnt check.
+ */
+
+static void cgroup_call_pre_destroy(struct cgroup *cgrp)
+{
+	struct cgroup_subsys *ss;
+	for_each_subsys(cgrp->root, ss)
+		if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
+			ss->pre_destroy(ss, cgrp);
+	return;
+}
+
+
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
 	/* is dentry a directory ? if so, kfree() associated cgroup */
@@ -2160,6 +2175,13 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	parent = cgrp->parent;
 	root = cgrp->root;
 	sb = root->sb;
+	/*
+	 * Call pre_destroy handlers of subsys
+	 */
+	cgroup_call_pre_destroy(cgrp);
+	/*
+	 * Notify subsyses that rmdir() request comes.
+	 */
 
 	if (cgroup_has_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
-- 
cgit 


From 58ae83db2a40dea15d4277d499a11dadc823c388 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:32 -0800
Subject: per-zone and reclaim enhancements for memory controller: calculate
 mapper_ratio per cgroup

Define function for calculating mapped_ratio in memory cgroup.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 ++++++++++-
 mm/memcontrol.c            | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4ec712967f7c..085cdcd817b0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -64,6 +64,12 @@ extern int mem_cgroup_prepare_migration(struct page *page);
 extern void mem_cgroup_end_migration(struct page *page);
 extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
 
+/*
+ * For memory reclaim.
+ */
+extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
+
+
 #else /* CONFIG_CGROUP_MEM_CONT */
 static inline void mm_init_cgroup(struct mm_struct *mm,
 					struct task_struct *p)
@@ -135,7 +141,10 @@ mem_cgroup_page_migration(struct page *page, struct page *newpage)
 {
 }
 
-
+static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
+{
+	return 0;
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1637575d3339..2ef214ed5cf8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -420,6 +420,23 @@ void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 	spin_unlock(&mem->lru_lock);
 }
 
+/*
+ * Calculate mapped_ratio under memory controller. This will be used in
+ * vmscan.c for deteremining we have to reclaim mapped pages.
+ */
+int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
+{
+	long total, rss;
+
+	/*
+	 * usage is recorded in bytes. But, here, we assume the number of
+	 * physical pages can be represented by "long" on any arch.
+	 */
+	total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L;
+	rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
+	return (int)((rss * 100L) / total);
+}
+
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
-- 
cgit 


From 5932f3671bb2dd873c5ac443cbf5dc2cd167ae94 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:33 -0800
Subject: per-zone and reclaim enhancements for memory controller: calculate
 active/inactive imbalance per cgroup

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 ++++++++
 mm/memcontrol.c            | 14 ++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 085cdcd817b0..bb9c079eeb0c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,6 +68,8 @@ extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
  * For memory reclaim.
  */
 extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
+extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem);
+
 
 
 #else /* CONFIG_CGROUP_MEM_CONT */
@@ -145,6 +147,12 @@ static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 {
 	return 0;
 }
+
+static inline int mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
+{
+	return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2ef214ed5cf8..78a928d90267 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -436,6 +436,20 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 	rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
 	return (int)((rss * 100L) / total);
 }
+/*
+ * This function is called from vmscan.c. In page reclaiming loop. balance
+ * between active and inactive list is calculated. For memory controller
+ * page reclaiming, we should use using mem_cgroup's imbalance rather than
+ * zone's global lru imbalance.
+ */
+long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
+{
+	unsigned long active, inactive;
+	/* active and inactive are the number of pages. 'long' is ok.*/
+	active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
+	inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
+	return (long) (active / (inactive + 1));
+}
 
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
-- 
cgit 


From 6c48a1d040a9a9eaa4acdd7d4cb3885e04bf8413 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:34 -0800
Subject: per-zone and reclaim enhancements for memory controller: remember
 reclaim priority in memory cgroup

Functions to remember reclaim priority per cgroup (as zone->prev_priority)

[akpm@linux-foundation.org: build fixes]
[akpm@linux-foundation.org: more build fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 20 ++++++++++++++++++++
 mm/memcontrol.c            | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bb9c079eeb0c..f82158faa494 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -70,6 +70,11 @@ extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
 extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
 extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem);
 
+extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
+extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
+							int priority);
+extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
+							int priority);
 
 
 #else /* CONFIG_CGROUP_MEM_CONT */
@@ -153,6 +158,21 @@ static inline int mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
 	return 0;
 }
 
+static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
+{
+	return 0;
+}
+
+static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
+						int priority)
+{
+}
+
+static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
+						int priority)
+{
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 78a928d90267..f8a6a39c440d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -132,6 +132,7 @@ struct mem_cgroup {
 	 */
 	spinlock_t lru_lock;
 	unsigned long control_type;	/* control RSS or RSS+Pagecache */
+	int	prev_priority;	/* for recording reclaim priority */
 	/*
 	 * statistics.
 	 */
@@ -451,6 +452,25 @@ long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
 	return (long) (active / (inactive + 1));
 }
 
+/*
+ * prev_priority control...this will be used in memory reclaim path.
+ */
+int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
+{
+	return mem->prev_priority;
+}
+
+void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority)
+{
+	if (priority < mem->prev_priority)
+		mem->prev_priority = priority;
+}
+
+void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
+{
+	mem->prev_priority = priority;
+}
+
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
-- 
cgit 


From cc38108e1ba7f3b9e12b82d0236fa3730c2e0439 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 7 Feb 2008 00:14:35 -0800
Subject: per-zone and reclaim enhancements for memory controller: calculate
 the number of pages to be scanned per cgroup

Define function for calculating the number of scan target on each Zone/LRU.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 15 +++++++++++++++
 mm/memcontrol.c            | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f82158faa494..d87090eb14c0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -76,6 +76,10 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 
+extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
+				struct zone *zone, int priority);
+extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
+				struct zone *zone, int priority);
 
 #else /* CONFIG_CGROUP_MEM_CONT */
 static inline void mm_init_cgroup(struct mm_struct *mm,
@@ -173,6 +177,17 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
+static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
+					struct zone *zone, int priority)
+{
+	return 0;
+}
+
+static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
+					struct zone *zone, int priority)
+{
+	return 0;
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f8a6a39c440d..40cdba68de34 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -471,6 +471,39 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
 	mem->prev_priority = priority;
 }
 
+/*
+ * Calculate # of pages to be scanned in this priority/zone.
+ * See also vmscan.c
+ *
+ * priority starts from "DEF_PRIORITY" and decremented in each loop.
+ * (see include/linux/mmzone.h)
+ */
+
+long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
+				   struct zone *zone, int priority)
+{
+	long nr_active;
+	int nid = zone->zone_pgdat->node_id;
+	int zid = zone_idx(zone);
+	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
+
+	nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE);
+	return (nr_active >> priority);
+}
+
+long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
+					struct zone *zone, int priority)
+{
+	long nr_inactive;
+	int nid = zone->zone_pgdat->node_id;
+	int zid = zone_idx(zone);
+	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
+
+	nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
+
+	return (nr_inactive >> priority);
+}
+
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
-- 
cgit 


From 3c541e14bfa553133c3473a6ed3e4c0583ea2285 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Thu, 7 Feb 2008 00:14:41 -0800
Subject: Memory controller remove control_type feature

Based on the discussion at http://lkml.org/lkml/2007/12/20/383, it was felt
that control_type might not be a good thing to implement right away.  We
can add this flexibility at a later point when required.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 +--
 mm/memcontrol.c            | 91 +++++++++-------------------------------------
 2 files changed, 18 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d87090eb14c0..9815951ec995 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,6 +38,7 @@ extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 extern void mem_cgroup_uncharge(struct page_cgroup *pc);
+extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
@@ -55,11 +56,6 @@ static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
 	return rcu_dereference(mm->mem_cgroup);
 }
 
-static inline void mem_cgroup_uncharge_page(struct page *page)
-{
-	mem_cgroup_uncharge(page_get_page_cgroup(page));
-}
-
 extern int mem_cgroup_prepare_migration(struct page *page);
 extern void mem_cgroup_end_migration(struct page *page);
 extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 315dee180129..5c2c702af617 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -131,7 +131,6 @@ struct mem_cgroup {
 	 */
 	struct mem_cgroup_lru_info info;
 
-	unsigned long control_type;	/* control RSS or RSS+Pagecache */
 	int	prev_priority;	/* for recording reclaim priority */
 	/*
 	 * statistics.
@@ -709,24 +708,17 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
 	int ret = 0;
-	struct mem_cgroup *mem;
 	if (!mm)
 		mm = &init_mm;
 
-	rcu_read_lock();
-	mem = rcu_dereference(mm->mem_cgroup);
-	css_get(&mem->css);
-	rcu_read_unlock();
-	if (mem->control_type == MEM_CGROUP_TYPE_ALL)
-		ret = mem_cgroup_charge_common(page, mm, gfp_mask,
+	ret = mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE);
-	css_put(&mem->css);
 	return ret;
 }
 
 /*
  * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge. This routine should be called with lock_page_cgroup held
  */
 void mem_cgroup_uncharge(struct page_cgroup *pc)
 {
@@ -736,8 +728,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 	unsigned long flags;
 
 	/*
-	 * This can handle cases when a page is not charged at all and we
-	 * are switching between handling the control_type.
+	 * Check if our page_cgroup is valid
 	 */
 	if (!pc)
 		return;
@@ -749,6 +740,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 		 * get page->cgroup and clear it under lock.
 		 * force_empty can drop page->cgroup without checking refcnt.
 		 */
+		unlock_page_cgroup(page);
 		if (clear_page_cgroup(page, pc) == pc) {
 			mem = pc->mem_cgroup;
 			css_put(&mem->css);
@@ -758,9 +750,17 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 			spin_unlock_irqrestore(&mz->lru_lock, flags);
 			kfree(pc);
 		}
+		lock_page_cgroup(page);
 	}
 }
 
+void mem_cgroup_uncharge_page(struct page *page)
+{
+	lock_page_cgroup(page);
+	mem_cgroup_uncharge(page_get_page_cgroup(page));
+	unlock_page_cgroup(page);
+}
+
 /*
  * Returns non-zero if a page (under migration) has valid page_cgroup member.
  * Refcnt of page_cgroup is incremented.
@@ -780,8 +780,12 @@ int mem_cgroup_prepare_migration(struct page *page)
 
 void mem_cgroup_end_migration(struct page *page)
 {
-	struct page_cgroup *pc = page_get_page_cgroup(page);
+	struct page_cgroup *pc;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
 	mem_cgroup_uncharge(pc);
+	unlock_page_cgroup(page);
 }
 /*
  * We know both *page* and *newpage* are now not-on-LRU and Pg_locked.
@@ -936,61 +940,6 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 				mem_cgroup_write_strategy);
 }
 
-static ssize_t mem_control_type_write(struct cgroup *cont,
-			struct cftype *cft, struct file *file,
-			const char __user *userbuf,
-			size_t nbytes, loff_t *pos)
-{
-	int ret;
-	char *buf, *end;
-	unsigned long tmp;
-	struct mem_cgroup *mem;
-
-	mem = mem_cgroup_from_cont(cont);
-	buf = kmalloc(nbytes + 1, GFP_KERNEL);
-	ret = -ENOMEM;
-	if (buf == NULL)
-		goto out;
-
-	buf[nbytes] = 0;
-	ret = -EFAULT;
-	if (copy_from_user(buf, userbuf, nbytes))
-		goto out_free;
-
-	ret = -EINVAL;
-	tmp = simple_strtoul(buf, &end, 10);
-	if (*end != '\0')
-		goto out_free;
-
-	if (tmp <= MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
-		goto out_free;
-
-	mem->control_type = tmp;
-	ret = nbytes;
-out_free:
-	kfree(buf);
-out:
-	return ret;
-}
-
-static ssize_t mem_control_type_read(struct cgroup *cont,
-				struct cftype *cft,
-				struct file *file, char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
-{
-	unsigned long val;
-	char buf[64], *s;
-	struct mem_cgroup *mem;
-
-	mem = mem_cgroup_from_cont(cont);
-	s = buf;
-	val = mem->control_type;
-	s += sprintf(s, "%lu\n", val);
-	return simple_read_from_buffer((void __user *)userbuf, nbytes,
-			ppos, buf, s - buf);
-}
-
-
 static ssize_t mem_force_empty_write(struct cgroup *cont,
 				struct cftype *cft, struct file *file,
 				const char __user *userbuf,
@@ -1088,11 +1037,6 @@ static struct cftype mem_cgroup_files[] = {
 		.private = RES_FAILCNT,
 		.read = mem_cgroup_read,
 	},
-	{
-		.name = "control_type",
-		.write = mem_control_type_write,
-		.read = mem_control_type_read,
-	},
 	{
 		.name = "force_empty",
 		.write = mem_force_empty_write,
@@ -1161,7 +1105,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 
 	res_counter_init(&mem->res);
 
-	mem->control_type = MEM_CGROUP_TYPE_ALL;
 	memset(&mem->info, 0, sizeof(mem->info));
 
 	for_each_node_state(node, N_POSSIBLE)
-- 
cgit 


From 31a7df01fd0cd786f60873a921aecafac148c290 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Thu, 7 Feb 2008 00:14:42 -0800
Subject: cgroups: mechanism to process each task in a cgroup

Provide cgroup_scan_tasks(), which iterates through every task in a cgroup,
calling a test function and a process function for each.  And call the process
function without holding the css_set_lock lock.

The idea is David Rientjes', predicting that such a function will make it much
easier in the future to extend things that require access to each task in a
cgroup without holding the lock,

[akpm@linux-foundation.org: cleanup]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  14 ++++
 kernel/cgroup.c        | 198 ++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 200 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d8e92223a79c..8675c691d3e2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -14,6 +14,7 @@
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
 #include <linux/cgroupstats.h>
+#include <linux/prio_heap.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -207,6 +208,14 @@ struct cftype {
 	int (*release) (struct inode *inode, struct file *file);
 };
 
+struct cgroup_scanner {
+	struct cgroup *cg;
+	int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
+	void (*process_task)(struct task_struct *p,
+			struct cgroup_scanner *scan);
+	struct ptr_heap *heap;
+};
+
 /* Add a new file to the given cgroup directory. Should only be
  * called by subsystems from within a populate() method */
 int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys,
@@ -299,11 +308,16 @@ struct cgroup_iter {
  *    returns NULL or until you want to end the iteration
  *
  * 3) call cgroup_iter_end() to destroy the iterator.
+ *
+ * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset.
+ *    - cgroup_scan_tasks() holds the css_set_lock when calling the test_task()
+ *      callback, but not while calling the process_task() callback.
  */
 void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
 struct task_struct *cgroup_iter_next(struct cgroup *cont,
 					struct cgroup_iter *it);
 void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
+int cgroup_scan_tasks(struct cgroup_scanner *scan);
 
 #else /* !CONFIG_CGROUPS */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4e8b16a8266c..bcc7a6e8e3c0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1695,6 +1695,29 @@ static void cgroup_advance_iter(struct cgroup *cgrp,
 	it->task = cg->tasks.next;
 }
 
+/*
+ * To reduce the fork() overhead for systems that are not actually
+ * using their cgroups capability, we don't maintain the lists running
+ * through each css_set to its tasks until we see the list actually
+ * used - in other words after the first call to cgroup_iter_start().
+ *
+ * The tasklist_lock is not held here, as do_each_thread() and
+ * while_each_thread() are protected by RCU.
+ */
+void cgroup_enable_task_cg_lists(void)
+{
+	struct task_struct *p, *g;
+	write_lock(&css_set_lock);
+	use_task_css_set_links = 1;
+	do_each_thread(g, p) {
+		task_lock(p);
+		if (list_empty(&p->cg_list))
+			list_add(&p->cg_list, &p->cgroups->tasks);
+		task_unlock(p);
+	} while_each_thread(g, p);
+	write_unlock(&css_set_lock);
+}
+
 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
 {
 	/*
@@ -1702,18 +1725,9 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
 	 * we need to enable the list linking each css_set to its
 	 * tasks, and fix up all existing tasks.
 	 */
-	if (!use_task_css_set_links) {
-		struct task_struct *p, *g;
-		write_lock(&css_set_lock);
-		use_task_css_set_links = 1;
- 		do_each_thread(g, p) {
-			task_lock(p);
-			if (list_empty(&p->cg_list))
-				list_add(&p->cg_list, &p->cgroups->tasks);
-			task_unlock(p);
- 		} while_each_thread(g, p);
-		write_unlock(&css_set_lock);
-	}
+	if (!use_task_css_set_links)
+		cgroup_enable_task_cg_lists();
+
 	read_lock(&css_set_lock);
 	it->cg_link = &cgrp->css_sets;
 	cgroup_advance_iter(cgrp, it);
@@ -1746,6 +1760,166 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
 	read_unlock(&css_set_lock);
 }
 
+static inline int started_after_time(struct task_struct *t1,
+				     struct timespec *time,
+				     struct task_struct *t2)
+{
+	int start_diff = timespec_compare(&t1->start_time, time);
+	if (start_diff > 0) {
+		return 1;
+	} else if (start_diff < 0) {
+		return 0;
+	} else {
+		/*
+		 * Arbitrarily, if two processes started at the same
+		 * time, we'll say that the lower pointer value
+		 * started first. Note that t2 may have exited by now
+		 * so this may not be a valid pointer any longer, but
+		 * that's fine - it still serves to distinguish
+		 * between two tasks started (effectively) simultaneously.
+		 */
+		return t1 > t2;
+	}
+}
+
+/*
+ * This function is a callback from heap_insert() and is used to order
+ * the heap.
+ * In this case we order the heap in descending task start time.
+ */
+static inline int started_after(void *p1, void *p2)
+{
+	struct task_struct *t1 = p1;
+	struct task_struct *t2 = p2;
+	return started_after_time(t1, &t2->start_time, t2);
+}
+
+/**
+ * cgroup_scan_tasks - iterate though all the tasks in a cgroup
+ * @scan: struct cgroup_scanner containing arguments for the scan
+ *
+ * Arguments include pointers to callback functions test_task() and
+ * process_task().
+ * Iterate through all the tasks in a cgroup, calling test_task() for each,
+ * and if it returns true, call process_task() for it also.
+ * The test_task pointer may be NULL, meaning always true (select all tasks).
+ * Effectively duplicates cgroup_iter_{start,next,end}()
+ * but does not lock css_set_lock for the call to process_task().
+ * The struct cgroup_scanner may be embedded in any structure of the caller's
+ * creation.
+ * It is guaranteed that process_task() will act on every task that
+ * is a member of the cgroup for the duration of this call. This
+ * function may or may not call process_task() for tasks that exit
+ * or move to a different cgroup during the call, or are forked or
+ * move into the cgroup during the call.
+ *
+ * Note that test_task() may be called with locks held, and may in some
+ * situations be called multiple times for the same task, so it should
+ * be cheap.
+ * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been
+ * pre-allocated and will be used for heap operations (and its "gt" member will
+ * be overwritten), else a temporary heap will be used (allocation of which
+ * may cause this function to fail).
+ */
+int cgroup_scan_tasks(struct cgroup_scanner *scan)
+{
+	int retval, i;
+	struct cgroup_iter it;
+	struct task_struct *p, *dropped;
+	/* Never dereference latest_task, since it's not refcounted */
+	struct task_struct *latest_task = NULL;
+	struct ptr_heap tmp_heap;
+	struct ptr_heap *heap;
+	struct timespec latest_time = { 0, 0 };
+
+	if (scan->heap) {
+		/* The caller supplied our heap and pre-allocated its memory */
+		heap = scan->heap;
+		heap->gt = &started_after;
+	} else {
+		/* We need to allocate our own heap memory */
+		heap = &tmp_heap;
+		retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+		if (retval)
+			/* cannot allocate the heap */
+			return retval;
+	}
+
+ again:
+	/*
+	 * Scan tasks in the cgroup, using the scanner's "test_task" callback
+	 * to determine which are of interest, and using the scanner's
+	 * "process_task" callback to process any of them that need an update.
+	 * Since we don't want to hold any locks during the task updates,
+	 * gather tasks to be processed in a heap structure.
+	 * The heap is sorted by descending task start time.
+	 * If the statically-sized heap fills up, we overflow tasks that
+	 * started later, and in future iterations only consider tasks that
+	 * started after the latest task in the previous pass. This
+	 * guarantees forward progress and that we don't miss any tasks.
+	 */
+	heap->size = 0;
+	cgroup_iter_start(scan->cg, &it);
+	while ((p = cgroup_iter_next(scan->cg, &it))) {
+		/*
+		 * Only affect tasks that qualify per the caller's callback,
+		 * if he provided one
+		 */
+		if (scan->test_task && !scan->test_task(p, scan))
+			continue;
+		/*
+		 * Only process tasks that started after the last task
+		 * we processed
+		 */
+		if (!started_after_time(p, &latest_time, latest_task))
+			continue;
+		dropped = heap_insert(heap, p);
+		if (dropped == NULL) {
+			/*
+			 * The new task was inserted; the heap wasn't
+			 * previously full
+			 */
+			get_task_struct(p);
+		} else if (dropped != p) {
+			/*
+			 * The new task was inserted, and pushed out a
+			 * different task
+			 */
+			get_task_struct(p);
+			put_task_struct(dropped);
+		}
+		/*
+		 * Else the new task was newer than anything already in
+		 * the heap and wasn't inserted
+		 */
+	}
+	cgroup_iter_end(scan->cg, &it);
+
+	if (heap->size) {
+		for (i = 0; i < heap->size; i++) {
+			struct task_struct *p = heap->ptrs[i];
+			if (i == 0) {
+				latest_time = p->start_time;
+				latest_task = p;
+			}
+			/* Process the task per the caller's callback */
+			scan->process_task(p, scan);
+			put_task_struct(p);
+		}
+		/*
+		 * If we had to process any tasks at all, scan again
+		 * in case some of them were in the middle of forking
+		 * children that didn't get processed.
+		 * Not the most efficient way to do it, but it avoids
+		 * having to take callback_mutex in the fork path
+		 */
+		goto again;
+	}
+	if (heap == &tmp_heap)
+		heap_free(&tmp_heap);
+	return 0;
+}
+
 /*
  * Stuff for reading the 'tasks' file.
  *
-- 
cgit 


From 956db3ca0606e78456786ef19fd4dc7a5151a6e1 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Thu, 7 Feb 2008 00:14:43 -0800
Subject: hotplug cpu: move tasks in empty cpusets to parent

This patch corrects a situation that occurs when one disables all the cpus in
a cpuset.

Currently, the disabled (cpu-less) cpuset inherits the cpus of its parent,
which is incorrect because it may then overlap its cpu-exclusive sibling.

Tasks of an empty cpuset should be moved to the cpuset which is the parent of
their current cpuset.  Or if the parent cpuset has no cpus, to its parent,
etc.

And the empty cpuset should be released (if it is flagged notify_on_release).

Depends on the cgroup_scan_tasks() function (proposed by David Rientjes) to
iterate through all tasks in the cpu-less cpuset.  We are deliberately
avoiding a walk of the tasklist.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |   1 +
 kernel/cgroup.c        |  22 +++----
 kernel/cpuset.c        | 167 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 145 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8675c691d3e2..ff9055fc3d2a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -318,6 +318,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cont,
 					struct cgroup_iter *it);
 void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
+int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
 #else /* !CONFIG_CGROUPS */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bcc7a6e8e3c0..2c5cccbe12e2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -489,7 +489,7 @@ static struct css_set *find_css_set(
  * Any task can increment and decrement the count field without lock.
  * So in general, code holding cgroup_mutex can't rely on the count
  * field not changing.  However, if the count goes to zero, then only
- * attach_task() can increment it again.  Because a count of zero
+ * cgroup_attach_task() can increment it again.  Because a count of zero
  * means that no tasks are currently attached, therefore there is no
  * way a task attached to that cgroup can fork (the other way to
  * increment the count).  So code holding cgroup_mutex can safely
@@ -520,17 +520,17 @@ static struct css_set *find_css_set(
  *	The task_lock() exception
  *
  * The need for this exception arises from the action of
- * attach_task(), which overwrites one tasks cgroup pointer with
+ * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
  * another.  It does so using cgroup_mutexe, however there are
  * several performance critical places that need to reference
  * task->cgroup without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
  * P.S.  One more locking exception.  RCU is used to guard the
- * update of a tasks cgroup pointer by attach_task()
+ * update of a tasks cgroup pointer by cgroup_attach_task()
  */
 
 /**
@@ -1194,7 +1194,7 @@ static void get_first_subsys(const struct cgroup *cgrp,
  * Call holding cgroup_mutex.  May take task_lock of
  * the task 'pid' during call.
  */
-static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	int retval = 0;
 	struct cgroup_subsys *ss;
@@ -1287,7 +1287,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
 		get_task_struct(tsk);
 	}
 
-	ret = attach_task(cgrp, tsk);
+	ret = cgroup_attach_task(cgrp, tsk);
 	put_task_struct(tsk);
 	return ret;
 }
@@ -2514,7 +2514,7 @@ out:
  *  - Used for /proc/<pid>/cgroup.
  *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
  *    doesn't really matter if tsk->cgroup changes after we read it,
- *    and we take cgroup_mutex, keeping attach_task() from changing it
+ *    and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
  *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
  *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
  *    cgroup to top_cgroup.
@@ -2625,7 +2625,7 @@ static struct file_operations proc_cgroupstats_operations = {
  * A pointer to the shared css_set was automatically copied in
  * fork.c by dup_task_struct().  However, we ignore that copy, since
  * it was not made under the protection of RCU or cgroup_mutex, so
- * might no longer be a valid cgroup pointer.  attach_task() might
+ * might no longer be a valid cgroup pointer.  cgroup_attach_task() might
  * have already changed current->cgroups, allowing the previously
  * referenced cgroup group to be removed and freed.
  *
@@ -2704,8 +2704,8 @@ void cgroup_post_fork(struct task_struct *child)
  *    attach us to a different cgroup, decrementing the count on
  *    the first cgroup that we never incremented.  But in this case,
  *    top_cgroup isn't going away, and either task has PF_EXITING set,
- *    which wards off any attach_task() attempts, or task is a failed
- *    fork, never visible to attach_task.
+ *    which wards off any cgroup_attach_task() attempts, or task is a failed
+ *    fork, never visible to cgroup_attach_task.
  *
  */
 void cgroup_exit(struct task_struct *tsk, int run_callbacks)
@@ -2845,7 +2845,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 	}
 
 	/* All seems fine. Finish by moving the task into the new cgroup */
-	ret = attach_task(child, tsk);
+	ret = cgroup_attach_task(child, tsk);
 	mutex_unlock(&cgroup_mutex);
 
  out_release:
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index cfaf6419d817..d94a8f7c4c29 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -56,6 +56,8 @@
 #include <asm/atomic.h>
 #include <linux/mutex.h>
 #include <linux/kfifo.h>
+#include <linux/workqueue.h>
+#include <linux/cgroup.h>
 
 /*
  * Tracks how many cpusets are currently defined in system.
@@ -96,6 +98,9 @@ struct cpuset {
 
 	/* partition number for rebuild_sched_domains() */
 	int pn;
+
+	/* used for walking a cpuset heirarchy */
+	struct list_head stack_list;
 };
 
 /* Retrieve the cpuset for a cgroup */
@@ -111,7 +116,10 @@ static inline struct cpuset *task_cs(struct task_struct *task)
 	return container_of(task_subsys_state(task, cpuset_subsys_id),
 			    struct cpuset, css);
 }
-
+struct cpuset_hotplug_scanner {
+	struct cgroup_scanner scan;
+	struct cgroup *to;
+};
 
 /* bits in struct cpuset flags field */
 typedef enum {
@@ -1687,53 +1695,146 @@ int __init cpuset_init(void)
 	return 0;
 }
 
+/**
+ * cpuset_do_move_task - move a given task to another cpuset
+ * @tsk: pointer to task_struct the task to move
+ * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
+ *
+ * Called by cgroup_scan_tasks() for each task in a cgroup.
+ * Return nonzero to stop the walk through the tasks.
+ */
+void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan)
+{
+	struct cpuset_hotplug_scanner *chsp;
+
+	chsp = container_of(scan, struct cpuset_hotplug_scanner, scan);
+	cgroup_attach_task(chsp->to, tsk);
+}
+
+/**
+ * move_member_tasks_to_cpuset - move tasks from one cpuset to another
+ * @from: cpuset in which the tasks currently reside
+ * @to: cpuset to which the tasks will be moved
+ *
+ * Called with manage_sem held
+ * callback_mutex must not be held, as attach_task() will take it.
+ *
+ * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * calling callback functions for each.
+ */
+static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
+{
+	struct cpuset_hotplug_scanner scan;
+
+	scan.scan.cg = from->css.cgroup;
+	scan.scan.test_task = NULL; /* select all tasks in cgroup */
+	scan.scan.process_task = cpuset_do_move_task;
+	scan.scan.heap = NULL;
+	scan.to = to->css.cgroup;
+
+	if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+		printk(KERN_ERR "move_member_tasks_to_cpuset: "
+				"cgroup_scan_tasks failed\n");
+}
+
 /*
  * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
  * or memory nodes, we need to walk over the cpuset hierarchy,
  * removing that CPU or node from all cpusets.  If this removes the
- * last CPU or node from a cpuset, then the guarantee_online_cpus()
- * or guarantee_online_mems() code will use that emptied cpusets
- * parent online CPUs or nodes.  Cpusets that were already empty of
- * CPUs or nodes are left empty.
- *
- * This routine is intentionally inefficient in a couple of regards.
- * It will check all cpusets in a subtree even if the top cpuset of
- * the subtree has no offline CPUs or nodes.  It checks both CPUs and
- * nodes, even though the caller could have been coded to know that
- * only one of CPUs or nodes needed to be checked on a given call.
- * This was done to minimize text size rather than cpu cycles.
+ * last CPU or node from a cpuset, then move the tasks in the empty
+ * cpuset to its next-highest non-empty parent.
  *
- * Call with both manage_mutex and callback_mutex held.
+ * The parent cpuset has some superset of the 'mems' nodes that the
+ * newly empty cpuset held, so no migration of memory is necessary.
  *
- * Recursive, on depth of cpuset subtree.
+ * Called with both manage_sem and callback_sem held
  */
+static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
+{
+	struct cpuset *parent;
+
+	/* the cgroup's css_sets list is in use if there are tasks
+	   in the cpuset; the list is empty if there are none;
+	   the cs->css.refcnt seems always 0 */
+	if (list_empty(&cs->css.cgroup->css_sets))
+		return;
 
-static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
+	/*
+	 * Find its next-highest non-empty parent, (top cpuset
+	 * has online cpus, so can't be empty).
+	 */
+	parent = cs->parent;
+	while (cpus_empty(parent->cpus_allowed)) {
+		/*
+		 * this empty cpuset should now be considered to
+		 * have been used, and therefore eligible for
+		 * release when empty (if it is notify_on_release)
+		 */
+		parent = parent->parent;
+	}
+
+	move_member_tasks_to_cpuset(cs, parent);
+}
+
+/*
+ * Walk the specified cpuset subtree and look for empty cpusets.
+ * The tasks of such cpuset must be moved to a parent cpuset.
+ *
+ * Note that such a notify_on_release cpuset must have had, at some time,
+ * member tasks or cpuset descendants and cpus and memory, before it can
+ * be a candidate for release.
+ *
+ * Called with manage_mutex held.  We take callback_mutex to modify
+ * cpus_allowed and mems_allowed.
+ *
+ * This walk processes the tree from top to bottom, completing one layer
+ * before dropping down to the next.  It always processes a node before
+ * any of its children.
+ *
+ * For now, since we lack memory hot unplug, we'll never see a cpuset
+ * that has tasks along with an empty 'mems'.  But if we did see such
+ * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
+ */
+static void scan_for_empty_cpusets(const struct cpuset *root)
 {
+	struct cpuset *cp;	/* scans cpusets being updated */
+	struct cpuset *child;	/* scans child cpusets of cp */
+	struct list_head queue;
 	struct cgroup *cont;
-	struct cpuset *c;
 
-	/* Each of our child cpusets mems must be online */
-	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
-		c = cgroup_cs(cont);
-		guarantee_online_cpus_mems_in_subtree(c);
-		if (!cpus_empty(c->cpus_allowed))
-			guarantee_online_cpus(c, &c->cpus_allowed);
-		if (!nodes_empty(c->mems_allowed))
-			guarantee_online_mems(c, &c->mems_allowed);
+	INIT_LIST_HEAD(&queue);
+
+	list_add_tail((struct list_head *)&root->stack_list, &queue);
+
+	mutex_lock(&callback_mutex);
+	while (!list_empty(&queue)) {
+		cp = container_of(queue.next, struct cpuset, stack_list);
+		list_del(queue.next);
+		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+			child = cgroup_cs(cont);
+			list_add_tail(&child->stack_list, &queue);
+		}
+		cont = cp->css.cgroup;
+		/* Remove offline cpus and mems from this cpuset. */
+		cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
+		nodes_and(cp->mems_allowed, cp->mems_allowed,
+						node_states[N_HIGH_MEMORY]);
+		if ((cpus_empty(cp->cpus_allowed) ||
+		     nodes_empty(cp->mems_allowed))) {
+			/* Move tasks from the empty cpuset to a parent */
+			mutex_unlock(&callback_mutex);
+			remove_tasks_in_empty_cpuset(cp);
+			mutex_lock(&callback_mutex);
+		}
 	}
+	mutex_unlock(&callback_mutex);
+	return;
 }
 
 /*
  * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
  * cpu_online_map and node_states[N_HIGH_MEMORY].  Force the top cpuset to
- * track what's online after any CPU or memory node hotplug or unplug
- * event.
- *
- * To ensure that we don't remove a CPU or node from the top cpuset
- * that is currently in use by a child cpuset (which would violate
- * the rule that cpusets must be subsets of their parent), we first
- * call the recursive routine guarantee_online_cpus_mems_in_subtree().
+ * track what's online after any CPU or memory node hotplug or unplug event.
  *
  * Since there are two callers of this routine, one for CPU hotplug
  * events and one for memory node hotplug events, we could have coded
@@ -1744,13 +1845,11 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 static void common_cpu_mem_hotplug_unplug(void)
 {
 	cgroup_lock();
-	mutex_lock(&callback_mutex);
 
-	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
 	top_cpuset.cpus_allowed = cpu_online_map;
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+	scan_for_empty_cpusets(&top_cpuset);
 
-	mutex_unlock(&callback_mutex);
 	cgroup_unlock();
 }
 
-- 
cgit 


From fa9ff4b185b8f7f124c1c6686f02e690f0625287 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@openedhand.com>
Date: Thu, 7 Feb 2008 00:14:49 -0800
Subject: ASIC3 driver

This is a patch for the Compaq ASIC3 multi function chip, found in many
PDAs (iPAQs, HTCs...).

It is a simplified version of Paul Sokolovsky's first proposal [1].  With
this code, it is basically a GPIO and IRQ expander.  My plan is to add more
features once this patch gets reviewed and accepted.

[1] http://lkml.org/lkml/2007/5/1/46

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
Cc: Paul Sokolovsky <pmiscml@gmail.com>
Cc: Ben Dooks <ben@trinity.fluff.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/Kconfig       |   7 +
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/asic3.c       | 588 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/asic3.h | 497 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 1093 insertions(+)
 create mode 100644 drivers/mfd/asic3.c
 create mode 100644 include/linux/mfd/asic3.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 25716193a534..0c886c882385 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -15,6 +15,13 @@ config MFD_SM501
 	  interface. The device may be connected by PCI or local bus with
 	  varying functions enabled.
 
+config MFD_ASIC3
+	bool "Support for Compaq ASIC3"
+	depends on GENERIC_HARDIRQS && ARM
+	 ---help---
+	  This driver supports the ASIC3 multifunction chip found on many
+	  PDAs (mainly iPAQ and HTC based ones)
+
 endmenu
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 51432091b323..521cd5cb68af 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
+obj-$(CONFIG_MFD_ASIC3)		+= asic3.o
 
 obj-$(CONFIG_MCP)		+= mcp-core.o
 obj-$(CONFIG_MCP_SA11X0)	+= mcp-sa11x0.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
new file mode 100644
index 000000000000..63fb1ff3ad10
--- /dev/null
+++ b/drivers/mfd/asic3.c
@@ -0,0 +1,588 @@
+/*
+ * driver/mfd/asic3.c
+ *
+ * Compaq ASIC3 support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2001 Compaq Computer Corporation.
+ * Copyright 2004-2005 Phil Blundell
+ * Copyright 2007 OpenedHand Ltd.
+ *
+ * Authors: Phil Blundell <pb@handhelds.org>,
+ *	    Samuel Ortiz <sameo@openedhand.com>
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+
+#include <linux/mfd/asic3.h>
+
+static inline void asic3_write_register(struct asic3 *asic,
+				 unsigned int reg, u32 value)
+{
+	iowrite16(value, (unsigned long)asic->mapping +
+		  (reg >> asic->bus_shift));
+}
+
+static inline u32 asic3_read_register(struct asic3 *asic,
+			       unsigned int reg)
+{
+	return ioread16((unsigned long)asic->mapping +
+			(reg >> asic->bus_shift));
+}
+
+/* IRQs */
+#define MAX_ASIC_ISR_LOOPS    20
+#define ASIC3_GPIO_Base_INCR \
+	(ASIC3_GPIO_B_Base - ASIC3_GPIO_A_Base)
+
+static void asic3_irq_flip_edge(struct asic3 *asic,
+				u32 base, int bit)
+{
+	u16 edge;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	edge = asic3_read_register(asic,
+				   base + ASIC3_GPIO_EdgeTrigger);
+	edge ^= bit;
+	asic3_write_register(asic,
+			     base + ASIC3_GPIO_EdgeTrigger, edge);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	int iter, i;
+	unsigned long flags;
+	struct asic3 *asic;
+
+	desc->chip->ack(irq);
+
+	asic = desc->handler_data;
+
+	for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
+		u32 status;
+		int bank;
+
+		spin_lock_irqsave(&asic->lock, flags);
+		status = asic3_read_register(asic,
+					     ASIC3_OFFSET(INTR, PIntStat));
+		spin_unlock_irqrestore(&asic->lock, flags);
+
+		/* Check all ten register bits */
+		if ((status & 0x3ff) == 0)
+			break;
+
+		/* Handle GPIO IRQs */
+		for (bank = 0; bank < ASIC3_NUM_GPIO_BANKS; bank++) {
+			if (status & (1 << bank)) {
+				unsigned long base, istat;
+
+				base = ASIC3_GPIO_A_Base
+				       + bank * ASIC3_GPIO_Base_INCR;
+
+				spin_lock_irqsave(&asic->lock, flags);
+				istat = asic3_read_register(asic,
+							    base +
+							    ASIC3_GPIO_IntStatus);
+				/* Clearing IntStatus */
+				asic3_write_register(asic,
+						     base +
+						     ASIC3_GPIO_IntStatus, 0);
+				spin_unlock_irqrestore(&asic->lock, flags);
+
+				for (i = 0; i < ASIC3_GPIOS_PER_BANK; i++) {
+					int bit = (1 << i);
+					unsigned int irqnr;
+
+					if (!(istat & bit))
+						continue;
+
+					irqnr = asic->irq_base +
+						(ASIC3_GPIOS_PER_BANK * bank)
+						+ i;
+					desc = irq_desc + irqnr;
+					desc->handle_irq(irqnr, desc);
+					if (asic->irq_bothedge[bank] & bit)
+						asic3_irq_flip_edge(asic, base,
+								    bit);
+				}
+			}
+		}
+
+		/* Handle remaining IRQs in the status register */
+		for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
+			/* They start at bit 4 and go up */
+			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
+				desc = irq_desc +  + i;
+				desc->handle_irq(asic->irq_base + i,
+						 desc);
+			}
+		}
+	}
+
+	if (iter >= MAX_ASIC_ISR_LOOPS)
+		printk(KERN_ERR "%s: interrupt processing overrun\n",
+		       __FUNCTION__);
+}
+
+static inline int asic3_irq_to_bank(struct asic3 *asic, int irq)
+{
+	int n;
+
+	n = (irq - asic->irq_base) >> 4;
+
+	return (n * (ASIC3_GPIO_B_Base - ASIC3_GPIO_A_Base));
+}
+
+static inline int asic3_irq_to_index(struct asic3 *asic, int irq)
+{
+	return (irq - asic->irq_base) & 0xf;
+}
+
+static void asic3_mask_gpio_irq(unsigned int irq)
+{
+	struct asic3 *asic = get_irq_chip_data(irq);
+	u32 val, bank, index;
+	unsigned long flags;
+
+	bank = asic3_irq_to_bank(asic, irq);
+	index = asic3_irq_to_index(asic, irq);
+
+	spin_lock_irqsave(&asic->lock, flags);
+	val = asic3_read_register(asic, bank + ASIC3_GPIO_Mask);
+	val |= 1 << index;
+	asic3_write_register(asic, bank + ASIC3_GPIO_Mask, val);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+static void asic3_mask_irq(unsigned int irq)
+{
+	struct asic3 *asic = get_irq_chip_data(irq);
+	int regval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	regval = asic3_read_register(asic,
+				     ASIC3_INTR_Base +
+				     ASIC3_INTR_IntMask);
+
+	regval &= ~(ASIC3_INTMASK_MASK0 <<
+		    (irq - (asic->irq_base + ASIC3_NUM_GPIOS)));
+
+	asic3_write_register(asic,
+			     ASIC3_INTR_Base +
+			     ASIC3_INTR_IntMask,
+			     regval);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+static void asic3_unmask_gpio_irq(unsigned int irq)
+{
+	struct asic3 *asic = get_irq_chip_data(irq);
+	u32 val, bank, index;
+	unsigned long flags;
+
+	bank = asic3_irq_to_bank(asic, irq);
+	index = asic3_irq_to_index(asic, irq);
+
+	spin_lock_irqsave(&asic->lock, flags);
+	val = asic3_read_register(asic, bank + ASIC3_GPIO_Mask);
+	val &= ~(1 << index);
+	asic3_write_register(asic, bank + ASIC3_GPIO_Mask, val);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+static void asic3_unmask_irq(unsigned int irq)
+{
+	struct asic3 *asic = get_irq_chip_data(irq);
+	int regval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	regval = asic3_read_register(asic,
+				     ASIC3_INTR_Base +
+				     ASIC3_INTR_IntMask);
+
+	regval |= (ASIC3_INTMASK_MASK0 <<
+		   (irq - (asic->irq_base + ASIC3_NUM_GPIOS)));
+
+	asic3_write_register(asic,
+			     ASIC3_INTR_Base +
+			     ASIC3_INTR_IntMask,
+			     regval);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+static int asic3_gpio_irq_type(unsigned int irq, unsigned int type)
+{
+	struct asic3 *asic = get_irq_chip_data(irq);
+	u32 bank, index;
+	u16 trigger, level, edge, bit;
+	unsigned long flags;
+
+	bank = asic3_irq_to_bank(asic, irq);
+	index = asic3_irq_to_index(asic, irq);
+	bit = 1<<index;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	level = asic3_read_register(asic,
+				    bank + ASIC3_GPIO_LevelTrigger);
+	edge = asic3_read_register(asic,
+				   bank + ASIC3_GPIO_EdgeTrigger);
+	trigger = asic3_read_register(asic,
+				      bank + ASIC3_GPIO_TriggerType);
+	asic->irq_bothedge[(irq - asic->irq_base) >> 4] &= ~bit;
+
+	if (type == IRQT_RISING) {
+		trigger |= bit;
+		edge |= bit;
+	} else if (type == IRQT_FALLING) {
+		trigger |= bit;
+		edge &= ~bit;
+	} else if (type == IRQT_BOTHEDGE) {
+		trigger |= bit;
+		if (asic3_gpio_get_value(asic, irq - asic->irq_base))
+			edge &= ~bit;
+		else
+			edge |= bit;
+		asic->irq_bothedge[(irq - asic->irq_base) >> 4] |= bit;
+	} else if (type == IRQT_LOW) {
+		trigger &= ~bit;
+		level &= ~bit;
+	} else if (type == IRQT_HIGH) {
+		trigger &= ~bit;
+		level |= bit;
+	} else {
+		/*
+		 * if type == IRQT_NOEDGE, we should mask interrupts, but
+		 * be careful to not unmask them if mask was also called.
+		 * Probably need internal state for mask.
+		 */
+		printk(KERN_NOTICE "asic3: irq type not changed.\n");
+	}
+	asic3_write_register(asic, bank + ASIC3_GPIO_LevelTrigger,
+			     level);
+	asic3_write_register(asic, bank + ASIC3_GPIO_EdgeTrigger,
+			     edge);
+	asic3_write_register(asic, bank + ASIC3_GPIO_TriggerType,
+			     trigger);
+	spin_unlock_irqrestore(&asic->lock, flags);
+	return 0;
+}
+
+static struct irq_chip asic3_gpio_irq_chip = {
+	.name		= "ASIC3-GPIO",
+	.ack		= asic3_mask_gpio_irq,
+	.mask		= asic3_mask_gpio_irq,
+	.unmask		= asic3_unmask_gpio_irq,
+	.set_type	= asic3_gpio_irq_type,
+};
+
+static struct irq_chip asic3_irq_chip = {
+	.name		= "ASIC3",
+	.ack		= asic3_mask_irq,
+	.mask		= asic3_mask_irq,
+	.unmask		= asic3_unmask_irq,
+};
+
+static int asic3_irq_probe(struct platform_device *pdev)
+{
+	struct asic3 *asic = platform_get_drvdata(pdev);
+	unsigned long clksel = 0;
+	unsigned int irq, irq_base;
+
+	asic->irq_nr = platform_get_irq(pdev, 0);
+	if (asic->irq_nr < 0)
+		return asic->irq_nr;
+
+	/* turn on clock to IRQ controller */
+	clksel |= CLOCK_SEL_CX;
+	asic3_write_register(asic, ASIC3_OFFSET(CLOCK, SEL),
+			     clksel);
+
+	irq_base = asic->irq_base;
+
+	for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
+		if (irq < asic->irq_base + ASIC3_NUM_GPIOS)
+			set_irq_chip(irq, &asic3_gpio_irq_chip);
+		else
+			set_irq_chip(irq, &asic3_irq_chip);
+
+		set_irq_chip_data(irq, asic);
+		set_irq_handler(irq, handle_level_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	}
+
+	asic3_write_register(asic, ASIC3_OFFSET(INTR, IntMask),
+			     ASIC3_INTMASK_GINTMASK);
+
+	set_irq_chained_handler(asic->irq_nr, asic3_irq_demux);
+	set_irq_type(asic->irq_nr, IRQT_RISING);
+	set_irq_data(asic->irq_nr, asic);
+
+	return 0;
+}
+
+static void asic3_irq_remove(struct platform_device *pdev)
+{
+	struct asic3 *asic = platform_get_drvdata(pdev);
+	unsigned int irq, irq_base;
+
+	irq_base = asic->irq_base;
+
+	for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
+		set_irq_flags(irq, 0);
+		set_irq_handler(irq, NULL);
+		set_irq_chip(irq, NULL);
+		set_irq_chip_data(irq, NULL);
+	}
+	set_irq_chained_handler(asic->irq_nr, NULL);
+}
+
+/* GPIOs */
+static inline u32 asic3_get_gpio(struct asic3 *asic, unsigned int base,
+				 unsigned int function)
+{
+	return asic3_read_register(asic, base + function);
+}
+
+static void asic3_set_gpio(struct asic3 *asic, unsigned int base,
+			   unsigned int function, u32 bits, u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	val |= (asic3_read_register(asic, base + function) & ~bits);
+
+	asic3_write_register(asic, base + function, val);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
+#define asic3_get_gpio_a(asic, fn) \
+	asic3_get_gpio(asic, ASIC3_GPIO_A_Base, ASIC3_GPIO_##fn)
+#define asic3_get_gpio_b(asic, fn) \
+	asic3_get_gpio(asic, ASIC3_GPIO_B_Base, ASIC3_GPIO_##fn)
+#define asic3_get_gpio_c(asic, fn) \
+	asic3_get_gpio(asic, ASIC3_GPIO_C_Base, ASIC3_GPIO_##fn)
+#define asic3_get_gpio_d(asic, fn) \
+	asic3_get_gpio(asic, ASIC3_GPIO_D_Base, ASIC3_GPIO_##fn)
+
+#define asic3_set_gpio_a(asic, fn, bits, val) \
+	asic3_set_gpio(asic, ASIC3_GPIO_A_Base, ASIC3_GPIO_##fn, bits, val)
+#define asic3_set_gpio_b(asic, fn, bits, val) \
+	asic3_set_gpio(asic, ASIC3_GPIO_B_Base, ASIC3_GPIO_##fn, bits, val)
+#define asic3_set_gpio_c(asic, fn, bits, val) \
+	asic3_set_gpio(asic, ASIC3_GPIO_C_Base, ASIC3_GPIO_##fn, bits, val)
+#define asic3_set_gpio_d(asic, fn, bits, val) \
+	asic3_set_gpio(asic, ASIC3_GPIO_D_Base, ASIC3_GPIO_##fn, bits, val)
+
+#define asic3_set_gpio_banks(asic, fn, bits, pdata, field) 		  \
+	do {								  \
+	     asic3_set_gpio_a((asic), fn, (bits), (pdata)->gpio_a.field); \
+	     asic3_set_gpio_b((asic), fn, (bits), (pdata)->gpio_b.field); \
+	     asic3_set_gpio_c((asic), fn, (bits), (pdata)->gpio_c.field); \
+	     asic3_set_gpio_d((asic), fn, (bits), (pdata)->gpio_d.field); \
+	} while (0)
+
+int asic3_gpio_get_value(struct asic3 *asic, unsigned gpio)
+{
+	u32 mask = ASIC3_GPIO_bit(gpio);
+
+	switch (gpio >> 4) {
+	case ASIC3_GPIO_BANK_A:
+		return asic3_get_gpio_a(asic, Status) & mask;
+	case ASIC3_GPIO_BANK_B:
+		return asic3_get_gpio_b(asic, Status) & mask;
+	case ASIC3_GPIO_BANK_C:
+		return asic3_get_gpio_c(asic, Status) & mask;
+	case ASIC3_GPIO_BANK_D:
+		return asic3_get_gpio_d(asic, Status) & mask;
+	default:
+		printk(KERN_ERR "%s: invalid GPIO value 0x%x",
+		       __FUNCTION__, gpio);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(asic3_gpio_get_value);
+
+void asic3_gpio_set_value(struct asic3 *asic, unsigned gpio, int val)
+{
+	u32 mask = ASIC3_GPIO_bit(gpio);
+	u32 bitval = 0;
+	if (val)
+		bitval = mask;
+
+	switch (gpio >> 4) {
+	case ASIC3_GPIO_BANK_A:
+		asic3_set_gpio_a(asic, Out, mask, bitval);
+		return;
+	case ASIC3_GPIO_BANK_B:
+		asic3_set_gpio_b(asic, Out, mask, bitval);
+		return;
+	case ASIC3_GPIO_BANK_C:
+		asic3_set_gpio_c(asic, Out, mask, bitval);
+		return;
+	case ASIC3_GPIO_BANK_D:
+		asic3_set_gpio_d(asic, Out, mask, bitval);
+		return;
+	default:
+		printk(KERN_ERR "%s: invalid GPIO value 0x%x",
+		       __FUNCTION__, gpio);
+		return;
+	}
+}
+EXPORT_SYMBOL(asic3_gpio_set_value);
+
+static int asic3_gpio_probe(struct platform_device *pdev)
+{
+	struct asic3_platform_data *pdata = pdev->dev.platform_data;
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
+	asic3_write_register(asic, ASIC3_GPIO_OFFSET(A, Mask), 0xffff);
+	asic3_write_register(asic, ASIC3_GPIO_OFFSET(B, Mask), 0xffff);
+	asic3_write_register(asic, ASIC3_GPIO_OFFSET(C, Mask), 0xffff);
+	asic3_write_register(asic, ASIC3_GPIO_OFFSET(D, Mask), 0xffff);
+
+	asic3_set_gpio_a(asic, SleepMask, 0xffff, 0xffff);
+	asic3_set_gpio_b(asic, SleepMask, 0xffff, 0xffff);
+	asic3_set_gpio_c(asic, SleepMask, 0xffff, 0xffff);
+	asic3_set_gpio_d(asic, SleepMask, 0xffff, 0xffff);
+
+	if (pdata) {
+		asic3_set_gpio_banks(asic, Out, 0xffff, pdata, init);
+		asic3_set_gpio_banks(asic, Direction, 0xffff, pdata, dir);
+		asic3_set_gpio_banks(asic, SleepMask, 0xffff, pdata,
+				     sleep_mask);
+		asic3_set_gpio_banks(asic, SleepOut, 0xffff, pdata, sleep_out);
+		asic3_set_gpio_banks(asic, BattFaultOut, 0xffff, pdata,
+				     batt_fault_out);
+		asic3_set_gpio_banks(asic, SleepConf, 0xffff, pdata,
+				     sleep_conf);
+		asic3_set_gpio_banks(asic, AltFunction, 0xffff, pdata,
+				     alt_function);
+	}
+
+	return 0;
+}
+
+static void asic3_gpio_remove(struct platform_device *pdev)
+{
+	return;
+}
+
+
+/* Core */
+static int asic3_probe(struct platform_device *pdev)
+{
+	struct asic3_platform_data *pdata = pdev->dev.platform_data;
+	struct asic3 *asic;
+	struct resource *mem;
+	unsigned long clksel;
+	int ret;
+
+	asic = kzalloc(sizeof(struct asic3), GFP_KERNEL);
+	if (!asic)
+		return -ENOMEM;
+
+	spin_lock_init(&asic->lock);
+	platform_set_drvdata(pdev, asic);
+	asic->dev = &pdev->dev;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "asic3: no MEM resource\n");
+		goto err_out_1;
+	}
+
+	asic->mapping = ioremap(mem->start, PAGE_SIZE);
+	if (!asic->mapping) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "asic3: couldn't ioremap\n");
+		goto err_out_1;
+	}
+
+	asic->irq_base = pdata->irq_base;
+
+	if (pdata && pdata->bus_shift)
+		asic->bus_shift = 2 - pdata->bus_shift;
+	else
+		asic->bus_shift = 0;
+
+	clksel = 0;
+	asic3_write_register(asic, ASIC3_OFFSET(CLOCK, SEL), clksel);
+
+	ret = asic3_irq_probe(pdev);
+	if (ret < 0) {
+		printk(KERN_ERR "asic3: couldn't probe IRQs\n");
+		goto err_out_2;
+	}
+	asic3_gpio_probe(pdev);
+
+	if (pdata->children) {
+		int i;
+		for (i = 0; i < pdata->n_children; i++) {
+			pdata->children[i]->dev.parent = &pdev->dev;
+			platform_device_register(pdata->children[i]);
+		}
+	}
+
+	printk(KERN_INFO "ASIC3 Core driver\n");
+
+	return 0;
+
+ err_out_2:
+	iounmap(asic->mapping);
+ err_out_1:
+	kfree(asic);
+
+	return ret;
+}
+
+static int asic3_remove(struct platform_device *pdev)
+{
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
+	asic3_gpio_remove(pdev);
+	asic3_irq_remove(pdev);
+
+	asic3_write_register(asic, ASIC3_OFFSET(CLOCK, SEL), 0);
+
+	iounmap(asic->mapping);
+
+	kfree(asic);
+
+	return 0;
+}
+
+static void asic3_shutdown(struct platform_device *pdev)
+{
+}
+
+static struct platform_driver asic3_device_driver = {
+	.driver		= {
+		.name	= "asic3",
+	},
+	.probe		= asic3_probe,
+	.remove		= __devexit_p(asic3_remove),
+	.shutdown	= asic3_shutdown,
+};
+
+static int __init asic3_init(void)
+{
+	int retval = 0;
+	retval = platform_driver_register(&asic3_device_driver);
+	return retval;
+}
+
+subsys_initcall(asic3_init);
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
new file mode 100644
index 000000000000..4ab2162db13b
--- /dev/null
+++ b/include/linux/mfd/asic3.h
@@ -0,0 +1,497 @@
+/*
+ * include/linux/mfd/asic3.h
+ *
+ * Compaq ASIC3 headers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2001 Compaq Computer Corporation.
+ * Copyright 2007 OpendHand.
+ */
+
+#ifndef __ASIC3_H__
+#define __ASIC3_H__
+
+#include <linux/types.h>
+
+struct asic3 {
+	void __iomem *mapping;
+	unsigned int bus_shift;
+	unsigned int irq_nr;
+	unsigned int irq_base;
+	spinlock_t lock;
+	u16 irq_bothedge[4];
+	struct device *dev;
+};
+
+struct asic3_platform_data {
+	struct {
+		u32 dir;
+		u32 init;
+		u32 sleep_mask;
+		u32 sleep_out;
+		u32 batt_fault_out;
+		u32 sleep_conf;
+		u32 alt_function;
+	} gpio_a, gpio_b, gpio_c, gpio_d;
+
+	unsigned int bus_shift;
+
+	unsigned int irq_base;
+
+	struct platform_device **children;
+	unsigned int n_children;
+};
+
+int asic3_gpio_get_value(struct asic3 *asic, unsigned gpio);
+void asic3_gpio_set_value(struct asic3 *asic, unsigned gpio, int val);
+
+#define ASIC3_NUM_GPIO_BANKS	4
+#define ASIC3_GPIOS_PER_BANK	16
+#define ASIC3_NUM_GPIOS		64
+#define ASIC3_NR_IRQS		ASIC3_NUM_GPIOS + 6
+
+#define ASIC3_GPIO_BANK_A	0
+#define ASIC3_GPIO_BANK_B	1
+#define ASIC3_GPIO_BANK_C	2
+#define ASIC3_GPIO_BANK_D	3
+
+#define ASIC3_GPIO(bank, gpio) \
+	((ASIC3_GPIOS_PER_BANK * ASIC3_GPIO_BANK_##bank) + (gpio))
+#define ASIC3_GPIO_bit(gpio) (1 << (gpio & 0xf))
+/* All offsets below are specified with this address bus shift */
+#define ASIC3_DEFAULT_ADDR_SHIFT 2
+
+#define ASIC3_OFFSET(base, reg) (ASIC3_##base##_Base + ASIC3_##base##_##reg)
+#define ASIC3_GPIO_OFFSET(base, reg) \
+	(ASIC3_GPIO_##base##_Base + ASIC3_GPIO_##reg)
+
+#define ASIC3_GPIO_A_Base      0x0000
+#define ASIC3_GPIO_B_Base      0x0100
+#define ASIC3_GPIO_C_Base      0x0200
+#define ASIC3_GPIO_D_Base      0x0300
+
+#define ASIC3_GPIO_Mask          0x00    /* R/W 0:don't mask */
+#define ASIC3_GPIO_Direction     0x04    /* R/W 0:input */
+#define ASIC3_GPIO_Out           0x08    /* R/W 0:output low */
+#define ASIC3_GPIO_TriggerType   0x0c    /* R/W 0:level */
+#define ASIC3_GPIO_EdgeTrigger   0x10    /* R/W 0:falling */
+#define ASIC3_GPIO_LevelTrigger  0x14    /* R/W 0:low level detect */
+#define ASIC3_GPIO_SleepMask     0x18    /* R/W 0:don't mask in sleep mode */
+#define ASIC3_GPIO_SleepOut      0x1c    /* R/W level 0:low in sleep mode */
+#define ASIC3_GPIO_BattFaultOut  0x20    /* R/W level 0:low in batt_fault */
+#define ASIC3_GPIO_IntStatus     0x24    /* R/W 0:none, 1:detect */
+#define ASIC3_GPIO_AltFunction   0x28	 /* R/W 1:LED register control */
+#define ASIC3_GPIO_SleepConf     0x2c    /*
+					  * R/W bit 1: autosleep
+					  * 0: disable gposlpout in normal mode,
+					  * enable gposlpout in sleep mode.
+					  */
+#define ASIC3_GPIO_Status        0x30    /* R   Pin status */
+
+#define ASIC3_SPI_Base		      0x0400
+#define ASIC3_SPI_Control               0x0000
+#define ASIC3_SPI_TxData                0x0004
+#define ASIC3_SPI_RxData                0x0008
+#define ASIC3_SPI_Int                   0x000c
+#define ASIC3_SPI_Status                0x0010
+
+#define SPI_CONTROL_SPR(clk)      ((clk) & 0x0f)  /* Clock rate */
+
+#define ASIC3_PWM_0_Base                0x0500
+#define ASIC3_PWM_1_Base                0x0600
+#define ASIC3_PWM_TimeBase              0x0000
+#define ASIC3_PWM_PeriodTime            0x0004
+#define ASIC3_PWM_DutyTime              0x0008
+
+#define PWM_TIMEBASE_VALUE(x)    ((x)&0xf)   /* Low 4 bits sets time base */
+#define PWM_TIMEBASE_ENABLE     (1 << 4)   /* Enable clock */
+
+#define ASIC3_LED_0_Base                0x0700
+#define ASIC3_LED_1_Base                0x0800
+#define ASIC3_LED_2_Base 		      0x0900
+#define ASIC3_LED_TimeBase              0x0000    /* R/W  7 bits */
+#define ASIC3_LED_PeriodTime            0x0004    /* R/W 12 bits */
+#define ASIC3_LED_DutyTime              0x0008    /* R/W 12 bits */
+#define ASIC3_LED_AutoStopCount         0x000c    /* R/W 16 bits */
+
+/* LED TimeBase bits - match ASIC2 */
+#define LED_TBS		0x0f /* Low 4 bits sets time base, max = 13 */
+			     /* Note: max = 5 on hx4700	*/
+			     /* 0: maximum time base */
+			     /* 1: maximum time base / 2 */
+			     /* n: maximum time base / 2^n */
+
+#define LED_EN		(1 << 4) /* LED ON/OFF 0:off, 1:on */
+#define LED_AUTOSTOP	(1 << 5) /* LED ON/OFF auto stop 0:disable, 1:enable */
+#define LED_ALWAYS	(1 << 6) /* LED Interrupt Mask 0:No mask, 1:mask */
+
+#define ASIC3_CLOCK_Base		0x0A00
+#define ASIC3_CLOCK_CDEX           0x00
+#define ASIC3_CLOCK_SEL            0x04
+
+#define CLOCK_CDEX_SOURCE       (1 << 0)  /* 2 bits */
+#define CLOCK_CDEX_SOURCE0      (1 << 0)
+#define CLOCK_CDEX_SOURCE1      (1 << 1)
+#define CLOCK_CDEX_SPI          (1 << 2)
+#define CLOCK_CDEX_OWM          (1 << 3)
+#define CLOCK_CDEX_PWM0         (1 << 4)
+#define CLOCK_CDEX_PWM1         (1 << 5)
+#define CLOCK_CDEX_LED0         (1 << 6)
+#define CLOCK_CDEX_LED1         (1 << 7)
+#define CLOCK_CDEX_LED2         (1 << 8)
+
+/* Clocks settings: 1 for 24.576 MHz, 0 for 12.288Mhz */
+#define CLOCK_CDEX_SD_HOST      (1 << 9)   /* R/W: SD host clock source */
+#define CLOCK_CDEX_SD_BUS       (1 << 10)  /* R/W: SD bus clock source ctrl */
+#define CLOCK_CDEX_SMBUS        (1 << 11)
+#define CLOCK_CDEX_CONTROL_CX   (1 << 12)
+
+#define CLOCK_CDEX_EX0          (1 << 13)  /* R/W: 32.768 kHz crystal */
+#define CLOCK_CDEX_EX1          (1 << 14)  /* R/W: 24.576 MHz crystal */
+
+#define CLOCK_SEL_SD_HCLK_SEL   (1 << 0)   /* R/W: SDIO host clock select */
+#define CLOCK_SEL_SD_BCLK_SEL   (1 << 1)   /* R/W: SDIO bus clock select */
+
+/* R/W: INT clock source control (32.768 kHz) */
+#define CLOCK_SEL_CX            (1 << 2)
+
+
+#define ASIC3_INTR_Base		0x0B00
+
+#define ASIC3_INTR_IntMask       0x00  /* Interrupt mask control */
+#define ASIC3_INTR_PIntStat      0x04  /* Peripheral interrupt status */
+#define ASIC3_INTR_IntCPS        0x08  /* Interrupt timer clock pre-scale */
+#define ASIC3_INTR_IntTBS        0x0c  /* Interrupt timer set */
+
+#define ASIC3_INTMASK_GINTMASK    (1 << 0)  /* Global INTs mask 1:enable */
+#define ASIC3_INTMASK_GINTEL      (1 << 1)  /* 1: rising edge, 0: hi level */
+#define ASIC3_INTMASK_MASK0       (1 << 2)
+#define ASIC3_INTMASK_MASK1       (1 << 3)
+#define ASIC3_INTMASK_MASK2       (1 << 4)
+#define ASIC3_INTMASK_MASK3       (1 << 5)
+#define ASIC3_INTMASK_MASK4       (1 << 6)
+#define ASIC3_INTMASK_MASK5       (1 << 7)
+
+#define ASIC3_INTR_PERIPHERAL_A   (1 << 0)
+#define ASIC3_INTR_PERIPHERAL_B   (1 << 1)
+#define ASIC3_INTR_PERIPHERAL_C   (1 << 2)
+#define ASIC3_INTR_PERIPHERAL_D   (1 << 3)
+#define ASIC3_INTR_LED0           (1 << 4)
+#define ASIC3_INTR_LED1           (1 << 5)
+#define ASIC3_INTR_LED2           (1 << 6)
+#define ASIC3_INTR_SPI            (1 << 7)
+#define ASIC3_INTR_SMBUS          (1 << 8)
+#define ASIC3_INTR_OWM            (1 << 9)
+
+#define ASIC3_INTR_CPS(x)         ((x)&0x0f)    /* 4 bits, max 14 */
+#define ASIC3_INTR_CPS_SET        (1 << 4)    /* Time base enable */
+
+
+/* Basic control of the SD ASIC */
+#define ASIC3_SDHWCTRL_Base	0x0E00
+#define ASIC3_SDHWCTRL_SDConf    0x00
+
+#define ASIC3_SDHWCTRL_SUSPEND    (1 << 0)  /* 1=suspend all SD operations */
+#define ASIC3_SDHWCTRL_CLKSEL     (1 << 1)  /* 1=SDICK, 0=HCLK */
+#define ASIC3_SDHWCTRL_PCLR       (1 << 2)  /* All registers of SDIO cleared */
+#define ASIC3_SDHWCTRL_LEVCD      (1 << 3)  /* SD card detection: 0:low */
+
+/* SD card write protection: 0=high */
+#define ASIC3_SDHWCTRL_LEVWP      (1 << 4)
+#define ASIC3_SDHWCTRL_SDLED      (1 << 5)  /* SD card LED signal 0=disable */
+
+/* SD card power supply ctrl 1=enable */
+#define ASIC3_SDHWCTRL_SDPWR      (1 << 6)
+
+#define ASIC3_EXTCF_Base		0x1100
+
+#define ASIC3_EXTCF_Select         0x00
+#define ASIC3_EXTCF_Reset          0x04
+
+#define ASIC3_EXTCF_SMOD0	         (1 << 0)  /* slot number of mode 0 */
+#define ASIC3_EXTCF_SMOD1	         (1 << 1)  /* slot number of mode 1 */
+#define ASIC3_EXTCF_SMOD2	         (1 << 2)  /* slot number of mode 2 */
+#define ASIC3_EXTCF_OWM_EN	         (1 << 4)  /* enable onewire module */
+#define ASIC3_EXTCF_OWM_SMB	         (1 << 5)  /* OWM bus selection */
+#define ASIC3_EXTCF_OWM_RESET            (1 << 6)  /* ?? used by OWM and CF */
+#define ASIC3_EXTCF_CF0_SLEEP_MODE       (1 << 7)  /* CF0 sleep state */
+#define ASIC3_EXTCF_CF1_SLEEP_MODE       (1 << 8)  /* CF1 sleep state */
+#define ASIC3_EXTCF_CF0_PWAIT_EN         (1 << 10) /* CF0 PWAIT_n control */
+#define ASIC3_EXTCF_CF1_PWAIT_EN         (1 << 11) /* CF1 PWAIT_n control */
+#define ASIC3_EXTCF_CF0_BUF_EN           (1 << 12) /* CF0 buffer control */
+#define ASIC3_EXTCF_CF1_BUF_EN           (1 << 13) /* CF1 buffer control */
+#define ASIC3_EXTCF_SD_MEM_ENABLE        (1 << 14)
+#define ASIC3_EXTCF_CF_SLEEP             (1 << 15) /* CF sleep mode control */
+
+/*********************************************
+ *  The Onewire interface registers
+ *
+ *  OWM_CMD
+ *  OWM_DAT
+ *  OWM_INTR
+ *  OWM_INTEN
+ *  OWM_CLKDIV
+ *
+ *********************************************/
+
+#define ASIC3_OWM_Base		0xC00
+
+#define ASIC3_OWM_CMD         0x00
+#define ASIC3_OWM_DAT         0x04
+#define ASIC3_OWM_INTR        0x08
+#define ASIC3_OWM_INTEN       0x0C
+#define ASIC3_OWM_CLKDIV      0x10
+
+#define ASIC3_OWM_CMD_ONEWR         (1 << 0)
+#define ASIC3_OWM_CMD_SRA           (1 << 1)
+#define ASIC3_OWM_CMD_DQO           (1 << 2)
+#define ASIC3_OWM_CMD_DQI           (1 << 3)
+
+#define ASIC3_OWM_INTR_PD          (1 << 0)
+#define ASIC3_OWM_INTR_PDR         (1 << 1)
+#define ASIC3_OWM_INTR_TBE         (1 << 2)
+#define ASIC3_OWM_INTR_TEMP        (1 << 3)
+#define ASIC3_OWM_INTR_RBF         (1 << 4)
+
+#define ASIC3_OWM_INTEN_EPD        (1 << 0)
+#define ASIC3_OWM_INTEN_IAS        (1 << 1)
+#define ASIC3_OWM_INTEN_ETBE       (1 << 2)
+#define ASIC3_OWM_INTEN_ETMT       (1 << 3)
+#define ASIC3_OWM_INTEN_ERBF       (1 << 4)
+
+#define ASIC3_OWM_CLKDIV_PRE       (3 << 0) /* two bits wide at bit 0 */
+#define ASIC3_OWM_CLKDIV_DIV       (7 << 2) /* 3 bits wide at bit 2 */
+
+
+/*****************************************************************************
+ *  The SD configuration registers are at a completely different location
+ *  in memory.  They are divided into three sets of registers:
+ *
+ *  SD_CONFIG         Core configuration register
+ *  SD_CTRL           Control registers for SD operations
+ *  SDIO_CTRL         Control registers for SDIO operations
+ *
+ *****************************************************************************/
+#define ASIC3_SD_CONFIG_Base            0x0400 /* Assumes 32 bit addressing */
+
+#define ASIC3_SD_CONFIG_Command           0x08   /* R/W: Command */
+
+/* [0:8] SD Control Register Base Address */
+#define ASIC3_SD_CONFIG_Addr0             0x20
+
+/* [9:31] SD Control Register Base Address */
+#define ASIC3_SD_CONFIG_Addr1             0x24
+
+/* R/O: interrupt assigned to pin */
+#define ASIC3_SD_CONFIG_IntPin            0x78
+
+/*
+ * Set to 0x1f to clock SD controller, 0 otherwise.
+ * At 0x82 - Gated Clock Ctrl
+ */
+#define ASIC3_SD_CONFIG_ClkStop           0x80
+
+/* Control clock of SD controller */
+#define ASIC3_SD_CONFIG_ClockMode         0x84
+#define ASIC3_SD_CONFIG_SDHC_PinStatus    0x88   /* R/0: SD pins status */
+#define ASIC3_SD_CONFIG_SDHC_Power1       0x90   /* Power1 - manual pwr ctrl */
+
+/* auto power up after card inserted */
+#define ASIC3_SD_CONFIG_SDHC_Power2       0x92
+
+/* auto power down when card removed */
+#define ASIC3_SD_CONFIG_SDHC_Power3       0x94
+#define ASIC3_SD_CONFIG_SDHC_CardDetect   0x98
+#define ASIC3_SD_CONFIG_SDHC_Slot         0xA0   /* R/O: support slot number */
+#define ASIC3_SD_CONFIG_SDHC_ExtGateClk1  0x1E0  /* Not used */
+#define ASIC3_SD_CONFIG_SDHC_ExtGateClk2  0x1E2  /* Not used*/
+
+/* GPIO Output Reg. , at 0x1EA - GPIO Output Enable Reg. */
+#define ASIC3_SD_CONFIG_SDHC_GPIO_OutAndEnable  0x1E8
+#define ASIC3_SD_CONFIG_SDHC_GPIO_Status  0x1EC  /* GPIO Status Reg. */
+
+/* Bit 1: double buffer/single buffer */
+#define ASIC3_SD_CONFIG_SDHC_ExtGateClk3  0x1F0
+
+/* Memory access enable (set to 1 to access SD Controller) */
+#define SD_CONFIG_COMMAND_MAE                (1<<1)
+
+#define SD_CONFIG_CLK_ENABLE_ALL             0x1f
+
+#define SD_CONFIG_POWER1_PC_33V              0x0200    /* Set for 3.3 volts */
+#define SD_CONFIG_POWER1_PC_OFF              0x0000    /* Turn off power */
+
+ /* two bits - number of cycles for card detection */
+#define SD_CONFIG_CARDDETECTMODE_CLK           ((x) & 0x3)
+
+
+#define ASIC3_SD_CTRL_Base            0x1000
+
+#define ASIC3_SD_CTRL_Cmd                  0x00
+#define ASIC3_SD_CTRL_Arg0                 0x08
+#define ASIC3_SD_CTRL_Arg1                 0x0C
+#define ASIC3_SD_CTRL_StopInternal         0x10
+#define ASIC3_SD_CTRL_TransferSectorCount  0x14
+#define ASIC3_SD_CTRL_Response0            0x18
+#define ASIC3_SD_CTRL_Response1            0x1C
+#define ASIC3_SD_CTRL_Response2            0x20
+#define ASIC3_SD_CTRL_Response3            0x24
+#define ASIC3_SD_CTRL_Response4            0x28
+#define ASIC3_SD_CTRL_Response5            0x2C
+#define ASIC3_SD_CTRL_Response6            0x30
+#define ASIC3_SD_CTRL_Response7            0x34
+#define ASIC3_SD_CTRL_CardStatus           0x38
+#define ASIC3_SD_CTRL_BufferCtrl           0x3C
+#define ASIC3_SD_CTRL_IntMaskCard          0x40
+#define ASIC3_SD_CTRL_IntMaskBuffer        0x44
+#define ASIC3_SD_CTRL_CardClockCtrl        0x48
+#define ASIC3_SD_CTRL_MemCardXferDataLen   0x4C
+#define ASIC3_SD_CTRL_MemCardOptionSetup   0x50
+#define ASIC3_SD_CTRL_ErrorStatus0         0x58
+#define ASIC3_SD_CTRL_ErrorStatus1         0x5C
+#define ASIC3_SD_CTRL_DataPort             0x60
+#define ASIC3_SD_CTRL_TransactionCtrl      0x68
+#define ASIC3_SD_CTRL_SoftwareReset        0x1C0
+
+#define SD_CTRL_SOFTWARE_RESET_CLEAR            (1<<0)
+
+#define SD_CTRL_TRANSACTIONCONTROL_SET          (1<<8)
+
+#define SD_CTRL_CARDCLOCKCONTROL_FOR_SD_CARD    (1<<15)
+#define SD_CTRL_CARDCLOCKCONTROL_ENABLE_CLOCK   (1<<8)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_512    (1<<7)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_256    (1<<6)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_128    (1<<5)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_64     (1<<4)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_32     (1<<3)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_16     (1<<2)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_8      (1<<1)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_4      (1<<0)
+#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_2      (0<<0)
+
+#define MEM_CARD_OPTION_REQUIRED                   0x000e
+#define MEM_CARD_OPTION_DATA_RESPONSE_TIMEOUT(x)   (((x) & 0x0f) << 4)
+#define MEM_CARD_OPTION_C2_MODULE_NOT_PRESENT      (1<<14)
+#define MEM_CARD_OPTION_DATA_XFR_WIDTH_1           (1<<15)
+#define MEM_CARD_OPTION_DATA_XFR_WIDTH_4           0
+
+#define SD_CTRL_COMMAND_INDEX(x)                   ((x) & 0x3f)
+#define SD_CTRL_COMMAND_TYPE_CMD                   (0 << 6)
+#define SD_CTRL_COMMAND_TYPE_ACMD                  (1 << 6)
+#define SD_CTRL_COMMAND_TYPE_AUTHENTICATION        (2 << 6)
+#define SD_CTRL_COMMAND_RESPONSE_TYPE_NORMAL       (0 << 8)
+#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R1       (4 << 8)
+#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R1B      (5 << 8)
+#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R2       (6 << 8)
+#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R3       (7 << 8)
+#define SD_CTRL_COMMAND_DATA_PRESENT               (1 << 11)
+#define SD_CTRL_COMMAND_TRANSFER_READ              (1 << 12)
+#define SD_CTRL_COMMAND_TRANSFER_WRITE             (0 << 12)
+#define SD_CTRL_COMMAND_MULTI_BLOCK                (1 << 13)
+#define SD_CTRL_COMMAND_SECURITY_CMD               (1 << 14)
+
+#define SD_CTRL_STOP_INTERNAL_ISSSUE_CMD12         (1 << 0)
+#define SD_CTRL_STOP_INTERNAL_AUTO_ISSUE_CMD12     (1 << 8)
+
+#define SD_CTRL_CARDSTATUS_RESPONSE_END            (1 << 0)
+#define SD_CTRL_CARDSTATUS_RW_END                  (1 << 2)
+#define SD_CTRL_CARDSTATUS_CARD_REMOVED_0          (1 << 3)
+#define SD_CTRL_CARDSTATUS_CARD_INSERTED_0         (1 << 4)
+#define SD_CTRL_CARDSTATUS_SIGNAL_STATE_PRESENT_0  (1 << 5)
+#define SD_CTRL_CARDSTATUS_WRITE_PROTECT           (1 << 7)
+#define SD_CTRL_CARDSTATUS_CARD_REMOVED_3          (1 << 8)
+#define SD_CTRL_CARDSTATUS_CARD_INSERTED_3         (1 << 9)
+#define SD_CTRL_CARDSTATUS_SIGNAL_STATE_PRESENT_3  (1 << 10)
+
+#define SD_CTRL_BUFFERSTATUS_CMD_INDEX_ERROR       (1 << 0)
+#define SD_CTRL_BUFFERSTATUS_CRC_ERROR             (1 << 1)
+#define SD_CTRL_BUFFERSTATUS_STOP_BIT_END_ERROR    (1 << 2)
+#define SD_CTRL_BUFFERSTATUS_DATA_TIMEOUT          (1 << 3)
+#define SD_CTRL_BUFFERSTATUS_BUFFER_OVERFLOW       (1 << 4)
+#define SD_CTRL_BUFFERSTATUS_BUFFER_UNDERFLOW      (1 << 5)
+#define SD_CTRL_BUFFERSTATUS_CMD_TIMEOUT           (1 << 6)
+#define SD_CTRL_BUFFERSTATUS_UNK7                  (1 << 7)
+#define SD_CTRL_BUFFERSTATUS_BUFFER_READ_ENABLE    (1 << 8)
+#define SD_CTRL_BUFFERSTATUS_BUFFER_WRITE_ENABLE   (1 << 9)
+#define SD_CTRL_BUFFERSTATUS_ILLEGAL_FUNCTION      (1 << 13)
+#define SD_CTRL_BUFFERSTATUS_CMD_BUSY              (1 << 14)
+#define SD_CTRL_BUFFERSTATUS_ILLEGAL_ACCESS        (1 << 15)
+
+#define SD_CTRL_INTMASKCARD_RESPONSE_END           (1 << 0)
+#define SD_CTRL_INTMASKCARD_RW_END                 (1 << 2)
+#define SD_CTRL_INTMASKCARD_CARD_REMOVED_0         (1 << 3)
+#define SD_CTRL_INTMASKCARD_CARD_INSERTED_0        (1 << 4)
+#define SD_CTRL_INTMASKCARD_SIGNAL_STATE_PRESENT_0 (1 << 5)
+#define SD_CTRL_INTMASKCARD_UNK6                   (1 << 6)
+#define SD_CTRL_INTMASKCARD_WRITE_PROTECT          (1 << 7)
+#define SD_CTRL_INTMASKCARD_CARD_REMOVED_3         (1 << 8)
+#define SD_CTRL_INTMASKCARD_CARD_INSERTED_3        (1 << 9)
+#define SD_CTRL_INTMASKCARD_SIGNAL_STATE_PRESENT_3 (1 << 10)
+
+#define SD_CTRL_INTMASKBUFFER_CMD_INDEX_ERROR      (1 << 0)
+#define SD_CTRL_INTMASKBUFFER_CRC_ERROR            (1 << 1)
+#define SD_CTRL_INTMASKBUFFER_STOP_BIT_END_ERROR   (1 << 2)
+#define SD_CTRL_INTMASKBUFFER_DATA_TIMEOUT         (1 << 3)
+#define SD_CTRL_INTMASKBUFFER_BUFFER_OVERFLOW      (1 << 4)
+#define SD_CTRL_INTMASKBUFFER_BUFFER_UNDERFLOW     (1 << 5)
+#define SD_CTRL_INTMASKBUFFER_CMD_TIMEOUT          (1 << 6)
+#define SD_CTRL_INTMASKBUFFER_UNK7                 (1 << 7)
+#define SD_CTRL_INTMASKBUFFER_BUFFER_READ_ENABLE   (1 << 8)
+#define SD_CTRL_INTMASKBUFFER_BUFFER_WRITE_ENABLE  (1 << 9)
+#define SD_CTRL_INTMASKBUFFER_ILLEGAL_FUNCTION     (1 << 13)
+#define SD_CTRL_INTMASKBUFFER_CMD_BUSY             (1 << 14)
+#define SD_CTRL_INTMASKBUFFER_ILLEGAL_ACCESS       (1 << 15)
+
+#define SD_CTRL_DETAIL0_RESPONSE_CMD_ERROR                   (1 << 0)
+#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_RESPONSE_NON_CMD12 (1 << 2)
+#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_RESPONSE_CMD12     (1 << 3)
+#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_READ_DATA          (1 << 4)
+#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_WRITE_CRC_STATUS   (1 << 5)
+#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_RESPONSE_NON_CMD12     (1 << 8)
+#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_RESPONSE_CMD12         (1 << 9)
+#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_READ_DATA              (1 << 10)
+#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_WRITE_CMD              (1 << 11)
+
+#define SD_CTRL_DETAIL1_NO_CMD_RESPONSE                      (1 << 0)
+#define SD_CTRL_DETAIL1_TIMEOUT_READ_DATA                    (1 << 4)
+#define SD_CTRL_DETAIL1_TIMEOUT_CRS_STATUS                   (1 << 5)
+#define SD_CTRL_DETAIL1_TIMEOUT_CRC_BUSY                     (1 << 6)
+
+#define ASIC3_SDIO_CTRL_Base          0x1200
+
+#define ASIC3_SDIO_CTRL_Cmd                  0x00
+#define ASIC3_SDIO_CTRL_CardPortSel          0x04
+#define ASIC3_SDIO_CTRL_Arg0                 0x08
+#define ASIC3_SDIO_CTRL_Arg1                 0x0C
+#define ASIC3_SDIO_CTRL_TransferBlockCount   0x14
+#define ASIC3_SDIO_CTRL_Response0            0x18
+#define ASIC3_SDIO_CTRL_Response1            0x1C
+#define ASIC3_SDIO_CTRL_Response2            0x20
+#define ASIC3_SDIO_CTRL_Response3            0x24
+#define ASIC3_SDIO_CTRL_Response4            0x28
+#define ASIC3_SDIO_CTRL_Response5            0x2C
+#define ASIC3_SDIO_CTRL_Response6            0x30
+#define ASIC3_SDIO_CTRL_Response7            0x34
+#define ASIC3_SDIO_CTRL_CardStatus           0x38
+#define ASIC3_SDIO_CTRL_BufferCtrl           0x3C
+#define ASIC3_SDIO_CTRL_IntMaskCard          0x40
+#define ASIC3_SDIO_CTRL_IntMaskBuffer        0x44
+#define ASIC3_SDIO_CTRL_CardXferDataLen      0x4C
+#define ASIC3_SDIO_CTRL_CardOptionSetup      0x50
+#define ASIC3_SDIO_CTRL_ErrorStatus0         0x54
+#define ASIC3_SDIO_CTRL_ErrorStatus1         0x58
+#define ASIC3_SDIO_CTRL_DataPort             0x60
+#define ASIC3_SDIO_CTRL_TransactionCtrl      0x68
+#define ASIC3_SDIO_CTRL_CardIntCtrl          0x6C
+#define ASIC3_SDIO_CTRL_ClocknWaitCtrl       0x70
+#define ASIC3_SDIO_CTRL_HostInformation      0x74
+#define ASIC3_SDIO_CTRL_ErrorCtrl            0x78
+#define ASIC3_SDIO_CTRL_LEDCtrl              0x7C
+#define ASIC3_SDIO_CTRL_SoftwareReset        0x1C0
+
+#define ASIC3_MAP_SIZE	     		     0x2000
+
+#endif /* __ASIC3_H__ */
-- 
cgit 


From 72a7fe3967dbf86cb34e24fbf1d957fe24d2f246 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Thu, 7 Feb 2008 00:15:17 -0800
Subject: Introduce flags for reserve_bootmem()

This patchset adds a flags variable to reserve_bootmem() and uses the
BOOTMEM_EXCLUSIVE flag in crashkernel reservation code to detect collisions
between crashkernel area and already used memory.

This patch:

Change the reserve_bootmem() function to accept a new flag BOOTMEM_EXCLUSIVE.
If that flag is set, the function returns with -EBUSY if the memory already
has been reserved in the past.  This is to avoid conflicts.

Because that code runs before SMP initialisation, there's no race condition
inside reserve_bootmem_core().

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix powerpc build]
Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: <linux-arch@vger.kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_irongate.c |  3 ++-
 arch/alpha/kernel/setup.c         |  5 +++--
 arch/alpha/mm/numa.c              |  5 +++--
 arch/arm/mm/init.c                |  4 ++--
 arch/arm/mm/mmu.c                 | 17 +++++++++++------
 arch/arm/mm/nommu.c               |  9 ++++++---
 arch/arm/plat-omap/fb.c           |  2 +-
 arch/avr32/kernel/setup.c         |  6 ++++--
 arch/blackfin/kernel/setup.c      |  2 +-
 arch/cris/kernel/setup.c          |  2 +-
 arch/frv/kernel/setup.c           | 16 ++++++++++------
 arch/h8300/kernel/setup.c         |  2 +-
 arch/ia64/mm/contig.c             |  2 +-
 arch/ia64/mm/discontig.c          |  4 ++--
 arch/m32r/kernel/setup.c          | 11 +++++++----
 arch/m32r/mm/discontig.c          |  5 +++--
 arch/m68k/atari/stram.c           |  2 +-
 arch/m68k/kernel/setup.c          |  3 ++-
 arch/m68knommu/kernel/setup.c     |  2 +-
 arch/mips/kernel/setup.c          |  4 ++--
 arch/mips/sgi-ip27/ip27-memory.c  |  3 ++-
 arch/parisc/mm/init.c             | 14 +++++++++-----
 arch/powerpc/mm/mem.c             |  8 +++++---
 arch/powerpc/mm/numa.c            |  2 +-
 arch/s390/kernel/setup.c          | 11 +++++++----
 arch/sh/kernel/setup.c            | 10 ++++++----
 arch/sh/mm/numa.c                 |  4 ++--
 arch/sparc/mm/init.c              |  6 +++---
 arch/sparc64/mm/init.c            |  8 ++++----
 arch/v850/kernel/anna.c           |  3 ++-
 arch/v850/kernel/as85ep1.c        |  3 ++-
 arch/v850/kernel/rte_ma1_cb.c     |  6 ++++--
 arch/v850/kernel/setup.c          | 12 ++++++++----
 arch/x86/kernel/mpparse_32.c      |  6 ++++--
 arch/x86/kernel/setup_32.c        | 18 ++++++++++--------
 arch/x86/kernel/setup_64.c        |  5 +++--
 arch/x86/mm/discontig_32.c        |  3 ++-
 arch/x86/mm/init_64.c             |  4 ++--
 arch/x86/mm/numa_64.c             |  5 +++--
 arch/x86/mm/srat_64.c             |  3 ++-
 include/asm-x86/mmzone_32.h       |  4 ++--
 include/linux/bootmem.h           | 17 +++++++++++++++--
 mm/bootmem.c                      | 27 +++++++++++++++++++++------
 43 files changed, 183 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index e4a0bcf1d28b..a872078497be 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -241,7 +241,8 @@ albacore_init_arch(void)
 				       size / 1024);
 		}
 #endif
-		reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop - pci_mem);
+		reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
+				pci_mem, BOOTMEM_DEFAULT);
 		printk("irongate_init_arch: temporarily reserving "
 			"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
 	}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index beff6297f788..74c346625658 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -428,7 +428,8 @@ setup_memory(void *kernel_end)
 	}
 
 	/* Reserve the bootmap memory.  */
-	reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size);
+	reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size,
+			BOOTMEM_DEFAULT);
 	printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -446,7 +447,7 @@ setup_memory(void *kernel_end)
 				       phys_to_virt(PFN_PHYS(max_low_pfn)));
 		} else {
 			reserve_bootmem(virt_to_phys((void *)initrd_start),
-					INITRD_SIZE);
+					INITRD_SIZE, BOOTMEM_DEFAULT);
 		}
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index e3e3806a6f25..10ab7833e83c 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -242,7 +242,8 @@ setup_memory_node(int nid, void *kernel_end)
 	}
 
 	/* Reserve the bootmap memory.  */
-	reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), bootmap_size);
+	reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start),
+			bootmap_size, BOOTMEM_DEFAULT);
 	printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
 
 	node_set_online(nid);
@@ -281,7 +282,7 @@ setup_memory(void *kernel_end)
 			nid = kvaddr_to_nid(initrd_start);
 			reserve_bootmem_node(NODE_DATA(nid),
 					     virt_to_phys((void *)initrd_start),
-					     INITRD_SIZE);
+					     INITRD_SIZE, BOOTMEM_DEFAULT);
 		}
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index c0ad7c0fbae0..ec00f26bffa4 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -239,7 +239,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	 * Reserve the bootmem bitmap for this node.
 	 */
 	reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
-			     boot_pages << PAGE_SHIFT);
+			     boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/*
@@ -247,7 +247,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	 */
 	if (node == initrd_node) {
 		reserve_bootmem_node(pgdat, phys_initrd_start,
-				     phys_initrd_size);
+				     phys_initrd_size, BOOTMEM_DEFAULT);
 		initrd_start = __phys_to_virt(phys_initrd_start);
 		initrd_end = initrd_start + phys_initrd_size;
 	}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5d61ee3d4a1..d41a75ed3dce 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -605,9 +605,11 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	 * Note that this can only be in node 0.
 	 */
 #ifdef CONFIG_XIP_KERNEL
-	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
+	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
+			BOOTMEM_DEFAULT);
 #else
-	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
+			BOOTMEM_DEFAULT);
 #endif
 
 	/*
@@ -615,7 +617,7 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	 * and can only be in node 0.
 	 */
 	reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
-			     PTRS_PER_PGD * sizeof(pgd_t));
+			     PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
 
 	/*
 	 * Hmm... This should go elsewhere, but we really really need to
@@ -638,8 +640,10 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	/* H1940 and RX3715 need to reserve this for suspend */
 
 	if (machine_is_h1940() || machine_is_rx3715()) {
-		reserve_bootmem_node(pgdat, 0x30003000, 0x1000);
-		reserve_bootmem_node(pgdat, 0x30081000, 0x1000);
+		reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
+				BOOTMEM_DEFAULT);
+		reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
+				BOOTMEM_DEFAULT);
 	}
 
 #ifdef CONFIG_SA1111
@@ -650,7 +654,8 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
 #endif
 	if (res_size)
-		reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
+		reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
+				BOOTMEM_DEFAULT);
 }
 
 /*
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 8cd3a60954f0..63c62fdea521 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -27,9 +27,11 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	 * Note that this can only be in node 0.
 	 */
 #ifdef CONFIG_XIP_KERNEL
-	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
+	reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
+			BOOTMEM_DEFAULT);
 #else
-	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+	reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
+			BOOTMEM_DEFAULT);
 #endif
 
 	/*
@@ -37,7 +39,8 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 	 * some architectures which the DRAM is the exception vector to trap,
 	 * alloc_page breaks with error, although it is not NULL, but "0."
 	 */
-	reserve_bootmem_node(pgdat, CONFIG_VECTORS_BASE, PAGE_SIZE);
+	reserve_bootmem_node(pgdat, CONFIG_VECTORS_BASE, PAGE_SIZE,
+			BOOTMEM_DEFAULT);
 }
 
 /*
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index ee40c1a0b83d..7854f19b77cf 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -207,7 +207,7 @@ void __init omapfb_reserve_sdram(void)
 			return;
 		}
 		if (rg.paddr)
-			reserve_bootmem(rg.paddr, rg.size);
+			reserve_bootmem(rg.paddr, rg.size, BOOTMEM_DEFAULT);
 		reserved += rg.size;
 		omapfb_config.mem_desc.region[i] = rg;
 		configured_regions++;
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 4b4c1884e1c5..e66a07a928cd 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -489,7 +489,8 @@ static void __init setup_bootmem(void)
 		/* Reserve space for the bootmem bitmap... */
 		reserve_bootmem_node(NODE_DATA(node),
 				     PFN_PHYS(bootmap_pfn),
-				     bootmap_size);
+				     bootmap_size,
+				     BOOTMEM_DEFAULT);
 
 		/* ...and any other reserved regions. */
 		for (res = reserved; res; res = res->sibling) {
@@ -505,7 +506,8 @@ static void __init setup_bootmem(void)
 			    && res->end < PFN_PHYS(max_pfn))
 				reserve_bootmem_node(
 					NODE_DATA(node), res->start,
-					res->end - res->start + 1);
+					res->end - res->start + 1,
+					BOOTMEM_DEFAULT);
 		}
 
 		node_set_online(node);
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index 462cae893757..6e106b3d7729 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -406,7 +406,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	free_bootmem(memory_start, memory_end - memory_start);
 
-	reserve_bootmem(memory_start, bootmap_size);
+	reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
 	/*
 	 * get kmalloc into gear
 	 */
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 65466c49d7a9..4da042e100a0 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -137,7 +137,7 @@ setup_arch(char **cmdline_p)
 	 * Arguments are start, size
          */
 
-        reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size);
+	reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size, BOOTMEM_DEFAULT);
 
 	/* paging_init() sets up the MMU and marks all pages as reserved */
 
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index b38ae1fc15fd..6c01464db699 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -925,13 +925,15 @@ static void __init setup_linux_memory(void)
 #endif
 
 	/* take back the memory occupied by the kernel image and the bootmem alloc map */
-	reserve_bootmem(kstart, kend - kstart + bootmap_size);
+	reserve_bootmem(kstart, kend - kstart + bootmap_size,
+			BOOTMEM_DEFAULT);
 
 	/* reserve the memory occupied by the initial ramdisk */
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= (low_top_pfn << PAGE_SHIFT)) {
-			reserve_bootmem(INITRD_START, INITRD_SIZE);
+			reserve_bootmem(INITRD_START, INITRD_SIZE,
+					BOOTMEM_DEFAULT);
 			initrd_start = INITRD_START + PAGE_OFFSET;
 			initrd_end = initrd_start + INITRD_SIZE;
 		}
@@ -986,9 +988,10 @@ static void __init setup_uclinux_memory(void)
 
 	/* now take back the bits the core kernel is occupying */
 #ifndef CONFIG_PROTECT_KERNEL
-	reserve_bootmem(kend, bootmap_size);
+	reserve_bootmem(kend, bootmap_size, BOOTMEM_DEFAULT);
 	reserve_bootmem((unsigned long) &__kernel_image_start,
-			kend - (unsigned long) &__kernel_image_start);
+			kend - (unsigned long) &__kernel_image_start,
+			BOOTMEM_DEFAULT);
 
 #else
 	dampr = __get_DAMPR(0);
@@ -996,14 +999,15 @@ static void __init setup_uclinux_memory(void)
 	dampr = (dampr >> 4) + 17;
 	dampr = 1 << dampr;
 
-	reserve_bootmem(__get_DAMPR(0) & xAMPRx_PPFN, dampr);
+	reserve_bootmem(__get_DAMPR(0) & xAMPRx_PPFN, dampr, BOOTMEM_DEFAULT);
 #endif
 
 	/* reserve some memory to do uncached DMA through if requested */
 #ifdef CONFIG_RESERVE_DMA_COHERENT
 	if (dma_coherent_mem_start)
 		reserve_bootmem(dma_coherent_mem_start,
-				dma_coherent_mem_end - dma_coherent_mem_start);
+				dma_coherent_mem_end - dma_coherent_mem_start,
+				BOOTMEM_DEFAULT);
 #endif
 
 } /* end setup_uclinux_memory() */
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index b2e86d0255e6..cd3734614d9d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -173,7 +173,7 @@ void __init setup_arch(char **cmdline_p)
 	 * the bootmem bitmap so we then reserve it after freeing it :-)
 	 */
 	free_bootmem(memory_start, memory_end - memory_start);
-	reserve_bootmem(memory_start, bootmap_size);
+	reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
 	/*
 	 * get kmalloc into gear
 	 */
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 7e9c275ea148..344f64eca7a9 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -218,7 +218,7 @@ find_memory (void)
 
 	/* Free all available memory, then mark bootmem-map as being in use. */
 	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
-	reserve_bootmem(bootmap_start, bootmap_size);
+	reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT);
 
 	find_initrd();
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 0b567398f38e..ee5e68b2af94 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -299,12 +299,12 @@ static void __init reserve_pernode_space(void)
 		pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
 		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 		base = __pa(bdp->node_bootmem_map);
-		reserve_bootmem_node(pdp, base, size);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
 
 		/* Now the per-node space */
 		size = mem_data[node].pernode_size;
 		base = __pa(mem_data[node].pernode_addr);
-		reserve_bootmem_node(pdp, base, size);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
 	}
 }
 
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index d64814385d70..f1f5db0c4084 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -177,25 +177,28 @@ static unsigned long __init setup_memory(void)
 	 */
 	reserve_bootmem(CONFIG_MEMORY_START + PAGE_SIZE,
 		(PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE - 1)
-		- CONFIG_MEMORY_START);
+		- CONFIG_MEMORY_START,
+		BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
 	 * enabling clean reboots, SMP operation, laptop functions.
 	 */
-	reserve_bootmem(CONFIG_MEMORY_START, PAGE_SIZE);
+	reserve_bootmem(CONFIG_MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve memory hole
 	 */
 #ifdef CONFIG_MEMHOLE
-	reserve_bootmem(CONFIG_MEMHOLE_START, CONFIG_MEMHOLE_SIZE);
+	reserve_bootmem(CONFIG_MEMHOLE_START, CONFIG_MEMHOLE_SIZE,
+			BOOTMEM_DEFAULT);
 #endif
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
-			reserve_bootmem(INITRD_START, INITRD_SIZE);
+			reserve_bootmem(INITRD_START, INITRD_SIZE,
+					BOOTMEM_DEFAULT);
 			initrd_start = INITRD_START + PAGE_OFFSET;
 			initrd_end = initrd_start + INITRD_SIZE;
 			printk("initrd:start[%08lx],size[%08lx]\n",
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index c7efdb0aefc5..07c1af7dc0e2 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -91,7 +91,8 @@ unsigned long __init setup_memory(void)
 			PFN_PHYS(mp->pages));
 
 		reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(mp->start_pfn),
-			PFN_PHYS(mp->free_pfn - mp->start_pfn) + bootmap_size);
+			PFN_PHYS(mp->free_pfn - mp->start_pfn) + bootmap_size,
+			BOOTMEM_DEFAULT);
 
 		if (max_low_pfn < max_pfn)
 			max_low_pfn = max_pfn;
@@ -104,7 +105,7 @@ unsigned long __init setup_memory(void)
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) {
 			reserve_bootmem_node(NODE_DATA(0), INITRD_START,
-				INITRD_SIZE);
+				INITRD_SIZE, BOOTMEM_DEFAULT);
 			initrd_start = INITRD_START + PAGE_OFFSET;
 			initrd_end = initrd_start + INITRD_SIZE;
 			printk("initrd:start[%08lx],size[%08lx]\n",
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index 8dda6515887a..0055a6c06f75 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -154,7 +154,7 @@ void __init atari_stram_reserve_pages(void *start_mem)
 	/* always reserve first page of ST-RAM, the first 2 kB are
 	 * supervisor-only! */
 	if (!kernel_in_stram)
-		reserve_bootmem (0, PAGE_SIZE);
+		reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
 
 }
 
diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c
index ed3a4caec620..9a06c48edcb3 100644
--- a/arch/m68k/kernel/setup.c
+++ b/arch/m68k/kernel/setup.c
@@ -323,7 +323,8 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (m68k_ramdisk.size) {
 		reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)),
-				     m68k_ramdisk.addr, m68k_ramdisk.size);
+				     m68k_ramdisk.addr, m68k_ramdisk.size,
+				     BOOTMEM_DEFAULT);
 		initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
 		initrd_end = initrd_start + m68k_ramdisk.size;
 		printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 81507c53d4a9..156c6c662c7e 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -203,7 +203,7 @@ void __init setup_arch(char **cmdline_p)
 	 * the bootmem bitmap so we then reserve it after freeing it :-)
 	 */
 	free_bootmem(memory_start, memory_end - memory_start);
-	reserve_bootmem(memory_start, bootmap_size);
+	reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
 
 	/*
 	 * Get kmalloc into gear.
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c032409cba9b..39f3dfe134fb 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -232,7 +232,7 @@ static void __init finalize_initrd(void)
 		goto disable;
 	}
 
-	reserve_bootmem(__pa(initrd_start), size);
+	reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
 	initrd_below_start_ok = 1;
 
 	printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
@@ -413,7 +413,7 @@ static void __init bootmem_init(void)
 	/*
 	 * Reserve the bootmap memory.
 	 */
-	reserve_bootmem(PFN_PHYS(mapstart), bootmap_size);
+	reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);
 
 	/*
 	 * Reserve initrd memory if needed.
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index e5e023f50a07..bf438d02366e 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -465,7 +465,8 @@ static void __init node_mem_init(cnodeid_t node)
 	free_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT,
 			(slot_lastpfn - slot_firstpfn) << PAGE_SHIFT);
 	reserve_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT,
-		((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size);
+		((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size,
+		BOOTMEM_DEFAULT);
 }
 
 /*
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index aa875fa43488..eb80f5e33d7d 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -315,11 +315,13 @@ static void __init setup_bootmem(void)
 #define PDC_CONSOLE_IO_IODC_SIZE 32768
 
 	reserve_bootmem_node(NODE_DATA(0), 0UL,
-			(unsigned long)(PAGE0->mem_free + PDC_CONSOLE_IO_IODC_SIZE));
+			(unsigned long)(PAGE0->mem_free +
+				PDC_CONSOLE_IO_IODC_SIZE), BOOTMEM_DEFAULT);
 	reserve_bootmem_node(NODE_DATA(0), __pa((unsigned long)_text),
-			(unsigned long)(_end - _text));
+			(unsigned long)(_end - _text), BOOTMEM_DEFAULT);
 	reserve_bootmem_node(NODE_DATA(0), (bootmap_start_pfn << PAGE_SHIFT),
-			((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT));
+			((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT),
+			BOOTMEM_DEFAULT);
 
 #ifndef CONFIG_DISCONTIGMEM
 
@@ -328,7 +330,8 @@ static void __init setup_bootmem(void)
 	for (i = 0; i < npmem_holes; i++) {
 		reserve_bootmem_node(NODE_DATA(0),
 				(pmem_holes[i].start_pfn << PAGE_SHIFT),
-				(pmem_holes[i].pages << PAGE_SHIFT));
+				(pmem_holes[i].pages << PAGE_SHIFT),
+				BOOTMEM_DEFAULT);
 	}
 #endif
 
@@ -346,7 +349,8 @@ static void __init setup_bootmem(void)
 			initrd_below_start_ok = 1;
 			printk(KERN_INFO "initrd: reserving %08lx-%08lx (mem_max %08lx)\n", __pa(initrd_start), __pa(initrd_start) + initrd_reserve, mem_max);
 
-			reserve_bootmem_node(NODE_DATA(0),__pa(initrd_start), initrd_reserve);
+			reserve_bootmem_node(NODE_DATA(0), __pa(initrd_start),
+					initrd_reserve, BOOTMEM_DEFAULT);
 		}
 	}
 #endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e8122447f019..ff5debf5eedd 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -220,12 +220,13 @@ void __init do_init_bootmem(void)
 				     lmb_size_bytes(&lmb.reserved, i) - 1;
 		if (addr < total_lowmem)
 			reserve_bootmem(lmb.reserved.region[i].base,
-					lmb_size_bytes(&lmb.reserved, i));
+					lmb_size_bytes(&lmb.reserved, i),
+					BOOTMEM_DEFAULT);
 		else if (lmb.reserved.region[i].base < total_lowmem) {
 			unsigned long adjusted_size = total_lowmem -
 				      lmb.reserved.region[i].base;
 			reserve_bootmem(lmb.reserved.region[i].base,
-					adjusted_size);
+					adjusted_size, BOOTMEM_DEFAULT);
 		}
 	}
 #else
@@ -234,7 +235,8 @@ void __init do_init_bootmem(void)
 	/* reserve the sections we're already using */
 	for (i = 0; i < lmb.reserved.cnt; i++)
 		reserve_bootmem(lmb.reserved.region[i].base,
-				lmb_size_bytes(&lmb.reserved, i));
+				lmb_size_bytes(&lmb.reserved, i),
+				BOOTMEM_DEFAULT);
 
 #endif
 	/* XXX need to clip this if using highmem? */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index c12adc3ddffd..bc60322d2436 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -675,7 +675,7 @@ void __init do_init_bootmem(void)
 				dbg("reserve_bootmem %lx %lx\n", physbase,
 				    size);
 				reserve_bootmem_node(NODE_DATA(nid), physbase,
-						     size);
+						     size, BOOTMEM_DEFAULT);
 			}
 		}
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 29ae165d1749..f9f8779022a0 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -649,21 +649,24 @@ setup_memory(void)
 	/*
 	 * Reserve memory used for lowcore/command line/kernel image.
 	 */
-	reserve_bootmem(0, (unsigned long)_ehead);
+	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
 	reserve_bootmem((unsigned long)_stext,
-			PFN_PHYS(start_pfn) - (unsigned long)_stext);
+			PFN_PHYS(start_pfn) - (unsigned long)_stext,
+			BOOTMEM_DEFAULT);
 	/*
 	 * Reserve the bootmem bitmap itself as well. We do this in two
 	 * steps (first step was init_bootmem()) because this catches
 	 * the (very unlikely) case of us accidentally initializing the
 	 * bootmem allocator with an invalid RAM area.
 	 */
-	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
+	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
+			BOOTMEM_DEFAULT);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (INITRD_START && INITRD_SIZE) {
 		if (INITRD_START + INITRD_SIZE <= memory_end) {
-			reserve_bootmem(INITRD_START, INITRD_SIZE);
+			reserve_bootmem(INITRD_START, INITRD_SIZE,
+					BOOTMEM_DEFAULT);
 			initrd_start = INITRD_START;
 			initrd_end = initrd_start + INITRD_SIZE;
 		} else {
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 855cdf9d85b1..af10db90a554 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -148,7 +148,8 @@ static void __init reserve_crashkernel(void)
 					(unsigned long)(free_mem >> 20));
 			crashk_res.start = crash_base;
 			crashk_res.end   = crash_base + crash_size - 1;
-			reserve_bootmem(crash_base, crash_size);
+			reserve_bootmem(crash_base, crash_size,
+					BOOTMEM_DEFAULT);
 		} else
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"you have to specify a base address\n");
@@ -184,13 +185,14 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	 * an invalid RAM area.
 	 */
 	reserve_bootmem(__MEMORY_START+PAGE_SIZE,
-		(PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START);
+		(PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START,
+		BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
 	 * enabling clean reboots, SMP operation, laptop functions.
 	 */
-	reserve_bootmem(__MEMORY_START, PAGE_SIZE);
+	reserve_bootmem(__MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT);
 
 	sparse_memory_present_with_active_regions(0);
 
@@ -200,7 +202,7 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
 			reserve_bootmem(INITRD_START + __MEMORY_START,
-					INITRD_SIZE);
+					INITRD_SIZE, BOOTMEM_DEFAULT);
 			initrd_start = INITRD_START + PAGE_OFFSET +
 					__MEMORY_START;
 			initrd_end = initrd_start + INITRD_SIZE;
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 8aff065dd307..2de7302724fc 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -80,9 +80,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 
 	/* Reserve the pgdat and bootmap space with the bootmem allocator */
 	reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
-			     sizeof(struct pglist_data));
+			     sizeof(struct pglist_data), BOOTMEM_DEFAULT);
 	reserve_bootmem_node(NODE_DATA(nid), free_pfn << PAGE_SHIFT,
-			     bootmap_pages << PAGE_SHIFT);
+			     bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
 
 	/* It's up */
 	node_set_online(nid);
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index a1bef07755a9..b89837accc88 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -259,7 +259,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	if (initrd_start) {
 		/* Reserve the initrd image area. */
 		size = initrd_end - initrd_start;
-		reserve_bootmem(initrd_start, size);
+		reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
 		*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
 
 		initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
@@ -268,7 +268,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 #endif
 	/* Reserve the kernel text/data/bss. */
 	size = (start_pfn << PAGE_SHIFT) - phys_base;
-	reserve_bootmem(phys_base, size);
+	reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT);
 	*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
 
 	/* Reserve the bootmem map.   We do not account for it
@@ -276,7 +276,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	 * in free_all_bootmem.
 	 */
 	size = bootmap_size;
-	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
+	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
 	*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
 
 	return max_pfn;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 523e993ee90c..e726c45645ff 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -997,7 +997,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail,
 		prom_printf("reserve_bootmem(initrd): base[%llx] size[%lx]\n",
 			initrd_start, initrd_end);
 #endif
-		reserve_bootmem(initrd_start, size);
+		reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
 
 		initrd_start += PAGE_OFFSET;
 		initrd_end += PAGE_OFFSET;
@@ -1007,7 +1007,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail,
 #ifdef CONFIG_DEBUG_BOOTMEM
 	prom_printf("reserve_bootmem(kernel): base[%lx] size[%lx]\n", kern_base, kern_size);
 #endif
-	reserve_bootmem(kern_base, kern_size);
+	reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT);
 	*pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT;
 
 	/* Add back in the initmem pages. */
@@ -1024,7 +1024,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail,
 	prom_printf("reserve_bootmem(bootmap): base[%lx] size[%lx]\n",
 		    (bootmap_pfn << PAGE_SHIFT), size);
 #endif
-	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
+	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
 
 	for (i = 0; i < pavail_ents; i++) {
 		unsigned long start_pfn, end_pfn;
@@ -1489,7 +1489,7 @@ static void __init taint_real_pages(void)
 					goto do_next_page;
 				}
 			}
-			reserve_bootmem(old_start, PAGE_SIZE);
+			reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT);
 
 		do_next_page:
 			old_start += PAGE_SIZE;
diff --git a/arch/v850/kernel/anna.c b/arch/v850/kernel/anna.c
index 0e429041a117..5978a25170fb 100644
--- a/arch/v850/kernel/anna.c
+++ b/arch/v850/kernel/anna.c
@@ -85,7 +85,8 @@ void __init mach_reserve_bootmem ()
 	/* The space between SRAM and SDRAM is filled with duplicate
 	   images of SRAM.  Prevent the kernel from using them.  */
 	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE));
+			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
+			 BOOTMEM_DEFAULT);
 }
 
 void mach_gettimeofday (struct timespec *tv)
diff --git a/arch/v850/kernel/as85ep1.c b/arch/v850/kernel/as85ep1.c
index 18437bc5c3ad..b525ecf3aea4 100644
--- a/arch/v850/kernel/as85ep1.c
+++ b/arch/v850/kernel/as85ep1.c
@@ -116,7 +116,8 @@ void __init mach_reserve_bootmem ()
 	if (SDRAM_ADDR < RAM_END && SDRAM_ADDR > RAM_START)
 		/* We can't use the space between SRAM and SDRAM, so
 		   prevent the kernel from trying.  */
-		reserve_bootmem (SRAM_END, SDRAM_ADDR - SRAM_END);
+		reserve_bootmem(SRAM_END, SDRAM_ADDR - SRAM_END,
+				BOOTMEM_DEFAULT);
 }
 
 void mach_gettimeofday (struct timespec *tv)
diff --git a/arch/v850/kernel/rte_ma1_cb.c b/arch/v850/kernel/rte_ma1_cb.c
index 9a716f946421..08abf3d5f8df 100644
--- a/arch/v850/kernel/rte_ma1_cb.c
+++ b/arch/v850/kernel/rte_ma1_cb.c
@@ -46,13 +46,15 @@ void __init mach_reserve_bootmem ()
 {
 #ifdef CONFIG_RTE_CB_MULTI
 	/* Prevent the kernel from touching the monitor's scratch RAM.  */
-	reserve_bootmem (MON_SCRATCH_ADDR, MON_SCRATCH_SIZE);
+	reserve_bootmem(MON_SCRATCH_ADDR, MON_SCRATCH_SIZE,
+			BOOTMEM_DEFAULT);
 #endif
 
 	/* The space between SRAM and SDRAM is filled with duplicate
 	   images of SRAM.  Prevent the kernel from using them.  */
 	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE));
+			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
+			 BOOTMEM_DEFAULT);
 }
 
 void mach_gettimeofday (struct timespec *tv)
diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c
index a914f244f494..a0a8456a8430 100644
--- a/arch/v850/kernel/setup.c
+++ b/arch/v850/kernel/setup.c
@@ -241,15 +241,18 @@ init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len)
 	if (kram_end > kram_start)
 		/* Reserve the RAM part of the kernel's address space, so it
 		   doesn't get allocated.  */
-		reserve_bootmem (kram_start, kram_end - kram_start);
+		reserve_bootmem(kram_start, kram_end - kram_start,
+				BOOTMEM_DEFAULT);
 	
 	if (intv_in_ram && !intv_in_kram)
 		/* Reserve the interrupt vector space.  */
-		reserve_bootmem (intv_start, intv_end - intv_start);
+		reserve_bootmem(intv_start, intv_end - intv_start,
+				BOOTMEM_DEFAULT);
 
 	if (bootmap >= ram_start && bootmap < ram_end)
 		/* Reserve the bootmap space.  */
-		reserve_bootmem (bootmap, bootmap_len);
+		reserve_bootmem(bootmap, bootmap_len,
+				BOOTMEM_DEFAULT);
 
 	/* Reserve the memory used by the root filesystem image if it's
 	   in RAM.  */
@@ -257,7 +260,8 @@ init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len)
 	    && (unsigned long)&_root_fs_image_start >= ram_start
 	    && (unsigned long)&_root_fs_image_start < ram_end)
 		reserve_bootmem ((unsigned long)&_root_fs_image_start,
-				 &_root_fs_image_end - &_root_fs_image_start);
+				 &_root_fs_image_end - &_root_fs_image_start,
+				 BOOTMEM_DEFAULT);
 
 	/* Let the platform-dependent code reserve some too.  */
 	if (mrb)
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 67009cdd5eca..f349e68e45a0 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -736,7 +736,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 			smp_found_config = 1;
 			printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
 				mpf, virt_to_phys(mpf));
-			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+					BOOTMEM_DEFAULT);
 			if (mpf->mpf_physptr) {
 				/*
 				 * We cannot access to MPC table to compute
@@ -751,7 +752,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 				unsigned long end = max_low_pfn * PAGE_SIZE;
 				if (mpf->mpf_physptr + size > end)
 					size = end - mpf->mpf_physptr;
-				reserve_bootmem(mpf->mpf_physptr, size);
+				reserve_bootmem(mpf->mpf_physptr, size,
+						BOOTMEM_DEFAULT);
 			}
 
 			mpf_found = mpf;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 62adc5f20be5..d1d8c347cc0b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -390,7 +390,7 @@ static void __init reserve_ebda_region(void)
 	unsigned int addr;
 	addr = get_bios_ebda();
 	if (addr)
-		reserve_bootmem(addr, PAGE_SIZE);
+		reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -484,7 +484,8 @@ static void __init reserve_crashkernel(void)
 					(unsigned long)(total_mem >> 20));
 			crashk_res.start = crash_base;
 			crashk_res.end   = crash_base + crash_size - 1;
-			reserve_bootmem(crash_base, crash_size);
+			reserve_bootmem(crash_base, crash_size,
+					BOOTMEM_DEFAULT);
 		} else
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"you have to specify a base address\n");
@@ -525,7 +526,7 @@ static void __init reserve_initrd(void)
 	}
 	if (ramdisk_end <= end_of_lowmem) {
 		/* All in lowmem, easy case */
-		reserve_bootmem(ramdisk_image, ramdisk_size);
+		reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
 		initrd_start = ramdisk_image + PAGE_OFFSET;
 		initrd_end = initrd_start+ramdisk_size;
 		return;
@@ -536,7 +537,7 @@ static void __init reserve_initrd(void)
 
 	/* Note: this includes all the lowmem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	reserve_bootmem(ramdisk_here, ramdisk_size);
+	reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 
@@ -606,13 +607,14 @@ void __init setup_bootmem_allocator(void)
 	 * bootmem allocator with an invalid RAM area.
 	 */
 	reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
-			 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
+			 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
+			 BOOTMEM_DEFAULT);
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
 	 * enabling clean reboots, SMP operation, laptop functions.
 	 */
-	reserve_bootmem(0, PAGE_SIZE);
+	reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
 
 	/* reserve EBDA region, it's a 4K region */
 	reserve_ebda_region();
@@ -622,7 +624,7 @@ void __init setup_bootmem_allocator(void)
        unless you have no PS/2 mouse plugged in. */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 	    boot_cpu_data.x86 == 6)
-	     reserve_bootmem(0xa0000 - 4096, 4096);
+	     reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
 
 #ifdef CONFIG_SMP
 	/*
@@ -630,7 +632,7 @@ void __init setup_bootmem_allocator(void)
 	 * FIXME: Don't need the extra page at 4K, but need to fix
 	 * trampoline before removing it. (see the GDT stuff)
 	 */
-	reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+	reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
 #endif
 #ifdef CONFIG_ACPI_SLEEP
 	/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c8939dfddfba..8345c3b12f05 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -189,7 +189,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
-	reserve_bootmem(bootmap, bootmap_size);
+	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
 #endif
 
@@ -238,7 +238,8 @@ static void __init reserve_crashkernel(void)
 					(unsigned long)(free_mem >> 20));
 			crashk_res.start = crash_base;
 			crashk_res.end   = crash_base + crash_size - 1;
-			reserve_bootmem(crash_base, crash_size);
+			reserve_bootmem(crash_base, crash_size,
+					BOOTMEM_DEFAULT);
 		} else
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"you have to specify a base address\n");
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 04b1d20e2613..c394ca0720b8 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -391,7 +391,8 @@ unsigned long __init setup_memory(void)
 void __init numa_kva_reserve(void)
 {
 	if (kva_pages)
-		reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
+		reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
+				BOOTMEM_DEFAULT);
 }
 
 void __init zone_sizes_init(void)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9b61c75a2355..5fe880fc305d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -644,9 +644,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 
 	/* Should check here against the e820 map to avoid double free */
 #ifdef CONFIG_NUMA
-	reserve_bootmem_node(NODE_DATA(nid), phys, len);
+	reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
 #else
-	reserve_bootmem(phys, len);
+	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #endif
 	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
 		dma_reserve += len / PAGE_SIZE;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 5a02bf4c91ec..1aecc658cd7d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -238,9 +238,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 
 	free_bootmem_with_active_regions(nodeid, end);
 
-	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
+			BOOTMEM_DEFAULT);
 	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
-			     bootmap_pages<<PAGE_SHIFT);
+			bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 #ifdef CONFIG_ACPI_NUMA
 	srat_reserve_add_area(nodeid);
 #endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 65416f843e59..ecd91ea8a8ae 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -488,7 +488,8 @@ void __init srat_reserve_add_area(int nodeid)
 		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
 				"pre-allocated memory.\n", (unsigned long long)total_mb);
 		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start);
+			       nodes_add[nodeid].end - nodes_add[nodeid].start,
+			       BOOTMEM_DEFAULT);
 	}
 }
 
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 5d6f4ce6e6d6..274a59566c45 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -107,8 +107,8 @@ static inline int pfn_valid(int pfn)
 /*
  * Following are macros that are specific to this numa platform.
  */
-#define reserve_bootmem(addr, size) \
-	reserve_bootmem_node(NODE_DATA(0), (addr), (size))
+#define reserve_bootmem(addr, size, flags) \
+	reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags))
 #define alloc_bootmem(x) \
 	__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 0365ec9fc0c9..4e4e340592fb 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -60,8 +60,20 @@ extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
 				  unsigned long goal,
 				  unsigned long limit);
 
+/*
+ * flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
+ * the architecture-specific code should honor this)
+ */
+#define BOOTMEM_DEFAULT		0
+#define BOOTMEM_EXCLUSIVE	(1<<0)
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void reserve_bootmem(unsigned long addr, unsigned long size);
+/*
+ * If flags is 0, then the return value is always 0 (success). If
+ * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
+ * memory already was reserved.
+ */
+extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -84,7 +96,8 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long endpfn);
 extern void reserve_bootmem_node(pg_data_t *pgdat,
 				 unsigned long physaddr,
-				 unsigned long size);
+				 unsigned long size,
+				 int flags);
 extern void free_bootmem_node(pg_data_t *pgdat,
 			      unsigned long addr,
 			      unsigned long size);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 00a96970b237..f6ff4337b424 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -111,11 +111,12 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
  * might be used for boot-time allocations - or it might get added
  * to the free page pool later on.
  */
-static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
-					unsigned long size)
+static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+			unsigned long addr, unsigned long size, int flags)
 {
 	unsigned long sidx, eidx;
 	unsigned long i;
+	int ret;
 
 	/*
 	 * round up, partially reserved pages are considered
@@ -133,7 +134,20 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
 #ifdef CONFIG_DEBUG_BOOTMEM
 			printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
 #endif
+			if (flags & BOOTMEM_EXCLUSIVE) {
+				ret = -EBUSY;
+				goto err;
+			}
 		}
+
+	return 0;
+
+err:
+	/* unreserve memory we accidentally reserved */
+	for (i--; i >= sidx; i--)
+		clear_bit(i, bdata->node_bootmem_map);
+
+	return ret;
 }
 
 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -374,9 +388,9 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 }
 
 void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-				 unsigned long size)
+				 unsigned long size, int flags)
 {
-	reserve_bootmem_core(pgdat->bdata, physaddr, size);
+	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
 }
 
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
@@ -398,9 +412,10 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-void __init reserve_bootmem(unsigned long addr, unsigned long size)
+int __init reserve_bootmem(unsigned long addr, unsigned long size,
+			    int flags)
 {
-	reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
+	return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
-- 
cgit 


From c76f860c44357f560a763d2894e95464cab7b159 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Thu, 7 Feb 2008 00:15:20 -0800
Subject: vmcoreinfo: rename vmcoreinfo's macros returning the size

This patchset is for the vmcoreinfo data.

The vmcoreinfo data has the minimum debugging information only for dump
filtering.  makedumpfile (dump filtering command) gets it to distinguish
unnecessary pages, and makedumpfile creates a small dumpfile.

This patch:

VMCOREINFO_SIZE() should be renamed VMCOREINFO_STRUCT_SIZE() since it's always
returning the size of the struct with a given name. This change would allow
VMCOREINFO_TYPEDEF_SIZE() to simply become VMCOREINFO_SIZE() since it need not
be used exclusively for typedefs.

This discussion is the following:
http://www.ussg.iu.edu/hypermail/linux/kernel/0709.3/0582.html

Signed-off-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/machine_kexec.c |  2 +-
 include/linux/kexec.h            |  6 +++---
 kernel/kexec.c                   | 14 +++++++-------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index d6cd45f4c6c7..6e725eff271b 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -135,7 +135,7 @@ void arch_crash_save_vmcoreinfo(void)
 
 	VMCOREINFO_SYMBOL(node_memblk);
 	VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
-	VMCOREINFO_SIZE(node_memblk_s);
+	VMCOREINFO_STRUCT_SIZE(node_memblk_s);
 	VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
 	VMCOREINFO_OFFSET(node_memblk_s, size);
 #endif
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2d9c448d8c52..39112a0e4693 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -130,11 +130,11 @@ unsigned long paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_SYMBOL(name) \
 	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
 #define VMCOREINFO_SIZE(name) \
-	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
-			      (unsigned long)sizeof(struct name))
-#define VMCOREINFO_TYPEDEF_SIZE(name) \
 	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
 			      (unsigned long)sizeof(name))
+#define VMCOREINFO_STRUCT_SIZE(name) \
+	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+			      (unsigned long)sizeof(struct name))
 #define VMCOREINFO_OFFSET(name, field) \
 	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
 			      (unsigned long)&(((struct name *)0)->field))
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9a26eec9eb04..8eb4df1cabcc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1376,15 +1376,15 @@ static int __init crash_save_vmcoreinfo_init(void)
 #ifdef CONFIG_SPARSEMEM
 	VMCOREINFO_SYMBOL(mem_section);
 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
-	VMCOREINFO_SIZE(mem_section);
+	VMCOREINFO_STRUCT_SIZE(mem_section);
 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
 #endif
-	VMCOREINFO_SIZE(page);
-	VMCOREINFO_SIZE(pglist_data);
-	VMCOREINFO_SIZE(zone);
-	VMCOREINFO_SIZE(free_area);
-	VMCOREINFO_SIZE(list_head);
-	VMCOREINFO_TYPEDEF_SIZE(nodemask_t);
+	VMCOREINFO_STRUCT_SIZE(page);
+	VMCOREINFO_STRUCT_SIZE(pglist_data);
+	VMCOREINFO_STRUCT_SIZE(zone);
+	VMCOREINFO_STRUCT_SIZE(free_area);
+	VMCOREINFO_STRUCT_SIZE(list_head);
+	VMCOREINFO_SIZE(nodemask_t);
 	VMCOREINFO_OFFSET(page, flags);
 	VMCOREINFO_OFFSET(page, _count);
 	VMCOREINFO_OFFSET(page, mapping);
-- 
cgit 


From 1e4f2955433231b4b02dc4a9eb5d4d403a8680e1 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Thu, 7 Feb 2008 00:15:22 -0800
Subject: vmcoreinfo: use the existing offsetof() for VMCOREINFO_OFFSET()

It is better that the existing offsetof() is used for VMCOREINFO_OFFSET().

This discussion is the following:
http://www.ussg.iu.edu/hypermail/linux/kernel/0709.3/0584.html

Signed-off-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Acked-by: Simon Horman <horms@verge.net.au>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 39112a0e4693..cbc3cd7e11c1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -137,7 +137,7 @@ unsigned long paddr_vmcoreinfo_note(void);
 			      (unsigned long)sizeof(struct name))
 #define VMCOREINFO_OFFSET(name, field) \
 	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
-			      (unsigned long)&(((struct name *)0)->field))
+			      (unsigned long)offsetof(struct name, field))
 #define VMCOREINFO_LENGTH(name, value) \
 	vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
 #define VMCOREINFO_NUMBER(name) \
-- 
cgit 


From bba1f603b88f30945ae4c5eccf2a6f5a12b877c5 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Thu, 7 Feb 2008 00:15:22 -0800
Subject: vmcoreinfo: add "VMCOREINFO_" to all the call for
 vmcoreinfo_append_str()

For readability, all the calls to vmcoreinfo_append_str() are changed to macros
having a prefix "VMCOREINFO_".

This discussion is the following:
http://www.ussg.iu.edu/hypermail/linux/kernel/0709.3/0584.html

Signed-off-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Acked-by: Simon Horman <horms@verge.net.au>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 4 ++++
 kernel/kexec.c        | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index cbc3cd7e11c1..3265968cd2cd 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -127,6 +127,10 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 unsigned long paddr_vmcoreinfo_note(void);
 
+#define VMCOREINFO_OSRELEASE(name) \
+	vmcoreinfo_append_str("OSRELEASE=%s\n", #name)
+#define VMCOREINFO_PAGESIZE(value) \
+	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
 	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
 #define VMCOREINFO_SIZE(name) \
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 8eb4df1cabcc..06a0e2775651 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1361,8 +1361,8 @@ unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
-	vmcoreinfo_append_str("OSRELEASE=%s\n", init_uts_ns.name.release);
-	vmcoreinfo_append_str("PAGESIZE=%ld\n", PAGE_SIZE);
+	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
+	VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
 	VMCOREINFO_SYMBOL(init_uts_ns);
 	VMCOREINFO_SYMBOL(node_online_map);
-- 
cgit 


From d1bc8e95445224276d7896b8b08cbb0b28a0ca80 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:26 -0800
Subject: Add an ERR_CAST() function to complement ERR_PTR and co.

Add an ERR_CAST() function to complement ERR_PTR and co.  for the purposes
of casting an error entyped as one pointer type to an error of another
pointer type whilst making it explicit as to what is going on.

This provides a replacement for the ERR_PTR(PTR_ERR(p)) construct.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/err.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/err.h b/include/linux/err.h
index 1ab1d44f8d3b..ec87f3142bf3 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -34,6 +34,19 @@ static inline long IS_ERR(const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+/**
+ * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
+ * @ptr: The pointer to cast.
+ *
+ * Explicitly cast an error-valued pointer to another pointer type in such a
+ * way as to make it clear that's what's going on.
+ */
+static inline void *ERR_CAST(const void *ptr)
+{
+	/* cast away the const */
+	return (void *) ptr;
+}
+
 #endif
 
 #endif /* _LINUX_ERR_H */
-- 
cgit 


From b46980feed937868d3333514028bfbe9a651e4ca Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:27 -0800
Subject: iget: introduce a function to register iget failure

Introduce a function to register failure in an inode construction path.  This
includes marking the inode under construction as bad, unlocking it and
releasing it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/porting | 18 +++++++++++++-----
 fs/bad_inode.c                    | 14 ++++++++++++++
 include/linux/fs.h                |  1 +
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0f33c77bc14b..fbd3815a5f57 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -184,11 +184,19 @@ just takes the superblock and inode number as arguments and does the
 test and set for you.
 
 e.g.
-       inode = iget_locked(sb, ino);
-       if (inode->i_state & I_NEW) {
-               read_inode_from_disk(inode);
-               unlock_new_inode(inode);
-       }
+	inode = iget_locked(sb, ino);
+	if (inode->i_state & I_NEW) {
+		err = read_inode_from_disk(inode);
+		if (err < 0) {
+			iget_failed(inode);
+			return err;
+		}
+		unlock_new_inode(inode);
+	}
+
+Note that if the process of setting up a new inode fails, then iget_failed()
+should be called on the inode to render it dead, and an appropriate error
+should be passed back to the caller.
 
 ---
 [recommended]
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 521ff7caadbd..f1c2ea8342f5 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -359,3 +359,17 @@ int is_bad_inode(struct inode *inode)
 }
 
 EXPORT_SYMBOL(is_bad_inode);
+
+/**
+ * iget_failed - Mark an under-construction inode as dead and release it
+ * @inode: The inode to discard
+ *
+ * Mark an under-construction inode as dead and release it.
+ */
+void iget_failed(struct inode *inode)
+{
+	make_bad_inode(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+}
+EXPORT_SYMBOL(iget_failed);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2925f7011ece..d202600d36bd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1780,6 +1780,7 @@ static inline struct inode *iget(struct super_block *sb, unsigned long ino)
 }
 
 extern void __iget(struct inode * inode);
+extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
-- 
cgit 


From 298384cd7929a3a14d7b116095973f0d02f5d09e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:34 -0800
Subject: iget: stop EFS from using iget() and read_inode()

Stop the EFS filesystem from using iget() and read_inode().  Replace
efs_read_inode() with efs_iget(), and call that instead of iget().  efs_iget()
then uses iget_locked() directly and returns a proper error code instead of an
inode in the event of an error.

efs_fill_super() returns any error incurred when getting the root inode
instead of EACCES.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/efs/inode.c         | 25 +++++++++++++++++--------
 fs/efs/namei.c         | 23 ++++++++++++-----------
 fs/efs/super.c         | 16 +++++++++++-----
 include/linux/efs_fs.h |  2 +-
 4 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 174696f9bf14..627c3026946d 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -45,17 +45,26 @@ static inline void extent_copy(efs_extent *src, efs_extent *dst) {
 	return;
 }
 
-void efs_read_inode(struct inode *inode)
+struct inode *efs_iget(struct super_block *super, unsigned long ino)
 {
 	int i, inode_index;
 	dev_t device;
 	u32 rdev;
 	struct buffer_head *bh;
-	struct efs_sb_info    *sb = SUPER_INFO(inode->i_sb);
-	struct efs_inode_info *in = INODE_INFO(inode);
+	struct efs_sb_info    *sb = SUPER_INFO(super);
+	struct efs_inode_info *in;
 	efs_block_t block, offset;
 	struct efs_dinode *efs_inode;
-  
+	struct inode *inode;
+
+	inode = iget_locked(super, ino);
+	if (IS_ERR(inode))
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	in = INODE_INFO(inode);
+
 	/*
 	** EFS layout:
 	**
@@ -159,13 +168,13 @@ void efs_read_inode(struct inode *inode)
 			break;
 	}
 
-	return;
+	unlock_new_inode(inode);
+	return inode;
         
 read_inode_error:
 	printk(KERN_WARNING "EFS: failed to read inode %lu\n", inode->i_ino);
-	make_bad_inode(inode);
-
-	return;
+	iget_failed(inode);
+	return ERR_PTR(-EIO);
 }
 
 static inline efs_block_t
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index f7f407075be1..e26704742d41 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -66,9 +66,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
 	lock_kernel();
 	inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
 	if (inodenum) {
-		if (!(inode = iget(dir->i_sb, inodenum))) {
+		inode = efs_iget(dir->i_sb, inodenum);
+		if (IS_ERR(inode)) {
 			unlock_kernel();
-			return ERR_PTR(-EACCES);
+			return ERR_CAST(inode);
 		}
 	}
 	unlock_kernel();
@@ -84,12 +85,11 @@ static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
 
 	if (ino == 0)
 		return ERR_PTR(-ESTALE);
-	inode = iget(sb, ino);
-	if (inode == NULL)
-		return ERR_PTR(-ENOMEM);
+	inode = efs_iget(sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
 
-	if (is_bad_inode(inode) ||
-	    (generation && inode->i_generation != generation)) {
+	if (generation && inode->i_generation != generation) {
 		iput(inode);
 		return ERR_PTR(-ESTALE);
 	}
@@ -116,7 +116,7 @@ struct dentry *efs_get_parent(struct dentry *child)
 	struct dentry *parent;
 	struct inode *inode;
 	efs_ino_t ino;
-	int error;
+	long error;
 
 	lock_kernel();
 
@@ -125,10 +125,11 @@ struct dentry *efs_get_parent(struct dentry *child)
 	if (!ino)
 		goto fail;
 
-	error = -EACCES;
-	inode = iget(child->d_inode->i_sb, ino);
-	if (!inode)
+	inode = efs_iget(child->d_inode->i_sb, ino);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
 		goto fail;
+	}
 
 	error = -ENOMEM;
 	parent = d_alloc_anon(inode);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index c79bc627f107..14082405cdd1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -107,7 +107,6 @@ static int efs_remount(struct super_block *sb, int *flags, char *data)
 static const struct super_operations efs_superblock_operations = {
 	.alloc_inode	= efs_alloc_inode,
 	.destroy_inode	= efs_destroy_inode,
-	.read_inode	= efs_read_inode,
 	.put_super	= efs_put_super,
 	.statfs		= efs_statfs,
 	.remount_fs	= efs_remount,
@@ -247,6 +246,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
 	struct efs_sb_info *sb;
 	struct buffer_head *bh;
 	struct inode *root;
+	int ret = -EINVAL;
 
  	sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
 	if (!sb)
@@ -303,12 +303,18 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
 	}
 	s->s_op   = &efs_superblock_operations;
 	s->s_export_op = &efs_export_ops;
-	root = iget(s, EFS_ROOTINODE);
+	root = efs_iget(s, EFS_ROOTINODE);
+	if (IS_ERR(root)) {
+		printk(KERN_ERR "EFS: get root inode failed\n");
+		ret = PTR_ERR(root);
+		goto out_no_fs;
+	}
+
 	s->s_root = d_alloc_root(root);
- 
 	if (!(s->s_root)) {
-		printk(KERN_ERR "EFS: get root inode failed\n");
+		printk(KERN_ERR "EFS: get root dentry failed\n");
 		iput(root);
+		ret = -ENOMEM;
 		goto out_no_fs;
 	}
 
@@ -318,7 +324,7 @@ out_no_fs_ul:
 out_no_fs:
 	s->s_fs_info = NULL;
 	kfree(sb);
-	return -EINVAL;
+	return ret;
 }
 
 static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) {
diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h
index dd57fe523e97..a695d63a07af 100644
--- a/include/linux/efs_fs.h
+++ b/include/linux/efs_fs.h
@@ -41,7 +41,7 @@ extern const struct inode_operations efs_dir_inode_operations;
 extern const struct file_operations efs_dir_operations;
 extern const struct address_space_operations efs_symlink_aops;
 
-extern void efs_read_inode(struct inode *);
+extern struct inode *efs_iget(struct super_block *, unsigned long);
 extern efs_block_t efs_map_block(struct inode *, efs_block_t);
 extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
-- 
cgit 


From 473043dcee1874aab99f66b0362b344618eb3790 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:36 -0800
Subject: iget: stop EXT3 from using iget() and read_inode()

Stop the EXT3 filesystem from using iget() and read_inode().  Replace
ext3_read_inode() with ext3_iget(), and call that instead of iget().
ext3_iget() then uses iget_locked() directly and returns a proper error code
instead of an inode in the event of an error.

ext3_fill_super() returns any error incurred when getting the root inode
instead of EINVAL.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c        | 58 +++++++++++++++++++++++++++++--------------------
 fs/ext3/inode.c         | 25 ++++++++++++++++-----
 fs/ext3/namei.c         | 29 ++++++++-----------------
 fs/ext3/resize.c        |  7 +++---
 fs/ext3/super.c         | 40 +++++++++++++++++++---------------
 include/linux/ext3_fs.h |  2 +-
 6 files changed, 89 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1bc8cd89c51d..58ae2f943f12 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -642,14 +642,15 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
 	unsigned long block_group;
 	int bit;
-	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bitmap_bh;
 	struct inode *inode = NULL;
+	long err = -EIO;
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
 		ext3_warning(sb, __FUNCTION__,
 			     "bad orphan ino %lu!  e2fsck was run?", ino);
-		goto out;
+		goto error;
 	}
 
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
@@ -658,38 +659,49 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	if (!bitmap_bh) {
 		ext3_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %lu", ino);
-		goto out;
+		goto error;
 	}
 
 	/* Having the inode bit set should be a 100% indicator that this
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 	 * inodes that were being truncated, so we can't check i_nlink==0.
 	 */
-	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
-			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
-			NEXT_ORPHAN(inode) > max_ino) {
-		ext3_warning(sb, __FUNCTION__,
-			     "bad orphan inode %lu!  e2fsck was run?", ino);
-		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
-		       bit, (unsigned long long)bitmap_bh->b_blocknr,
-		       ext3_test_bit(bit, bitmap_bh->b_data));
-		printk(KERN_NOTICE "inode=%p\n", inode);
-		if (inode) {
-			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
-			       is_bad_inode(inode));
-			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
-			       NEXT_ORPHAN(inode));
-			printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
-		}
+	if (!ext3_test_bit(bit, bitmap_bh->b_data))
+		goto bad_orphan;
+
+	inode = ext3_iget(sb, ino);
+	if (IS_ERR(inode))
+		goto iget_failed;
+
+	if (NEXT_ORPHAN(inode) > max_ino)
+		goto bad_orphan;
+	brelse(bitmap_bh);
+	return inode;
+
+iget_failed:
+	err = PTR_ERR(inode);
+	inode = NULL;
+bad_orphan:
+	ext3_warning(sb, __FUNCTION__,
+		     "bad orphan inode %lu!  e2fsck was run?", ino);
+	printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+	       bit, (unsigned long long)bitmap_bh->b_blocknr,
+	       ext3_test_bit(bit, bitmap_bh->b_data));
+	printk(KERN_NOTICE "inode=%p\n", inode);
+	if (inode) {
+		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+		       is_bad_inode(inode));
+		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+		       NEXT_ORPHAN(inode));
+		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
 		/* Avoid freeing blocks if we got a bad deleted inode */
-		if (inode && inode->i_nlink == 0)
+		if (inode->i_nlink == 0)
 			inode->i_blocks = 0;
 		iput(inode);
-		inode = NULL;
 	}
-out:
 	brelse(bitmap_bh);
-	return inode;
+error:
+	return ERR_PTR(err);
 }
 
 unsigned long ext3_count_free_inodes (struct super_block * sb)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8a9ce2d09bde..eb95670a27eb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2654,21 +2654,31 @@ void ext3_get_inode_flags(struct ext3_inode_info *ei)
 		ei->i_flags |= EXT3_DIRSYNC_FL;
 }
 
-void ext3_read_inode(struct inode * inode)
+struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 {
 	struct ext3_iloc iloc;
 	struct ext3_inode *raw_inode;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_inode_info *ei;
 	struct buffer_head *bh;
+	struct inode *inode;
+	long ret;
 	int block;
 
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	ei = EXT3_I(inode);
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 	ei->i_acl = EXT3_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 
-	if (__ext3_get_inode_loc(inode, &iloc, 0))
+	ret = __ext3_get_inode_loc(inode, &iloc, 0);
+	if (ret < 0)
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = ext3_raw_inode(&iloc);
@@ -2699,6 +2709,7 @@ void ext3_read_inode(struct inode * inode)
 		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
 			/* this inode is deleted */
 			brelse (bh);
+			ret = -ESTALE;
 			goto bad_inode;
 		}
 		/* The only unlinked inodes we let through here have
@@ -2742,6 +2753,7 @@ void ext3_read_inode(struct inode * inode)
 		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
 		    EXT3_INODE_SIZE(inode->i_sb)) {
 			brelse (bh);
+			ret = -EIO;
 			goto bad_inode;
 		}
 		if (ei->i_extra_isize == 0) {
@@ -2783,11 +2795,12 @@ void ext3_read_inode(struct inode * inode)
 	}
 	brelse (iloc.bh);
 	ext3_set_inode_flags(inode);
-	return;
+	unlock_new_inode(inode);
+	return inode;
 
 bad_inode:
-	make_bad_inode(inode);
-	return;
+	iget_failed(inode);
+	return ERR_PTR(ret);
 }
 
 /*
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 92b83b004dd8..dec3e0d88ab1 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1037,17 +1037,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 		if (!ext3_valid_inum(dir->i_sb, ino)) {
 			ext3_error(dir->i_sb, "ext3_lookup",
 				   "bad inode number: %lu", ino);
-			inode = NULL;
-		} else
-			inode = iget(dir->i_sb, ino);
-
-		if (!inode)
-			return ERR_PTR(-EACCES);
-
-		if (is_bad_inode(inode)) {
-			iput(inode);
-			return ERR_PTR(-ENOENT);
+			return ERR_PTR(-EIO);
 		}
+		inode = ext3_iget(dir->i_sb, ino);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1076,18 +1070,13 @@ struct dentry *ext3_get_parent(struct dentry *child)
 	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
 		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
 			   "bad inode number: %lu", ino);
-		inode = NULL;
-	} else
-		inode = iget(child->d_inode->i_sb, ino);
-
-	if (!inode)
-		return ERR_PTR(-EACCES);
-
-	if (is_bad_inode(inode)) {
-		iput(inode);
-		return ERR_PTR(-ENOENT);
+		return ERR_PTR(-EIO);
 	}
 
+	inode = ext3_iget(child->d_inode->i_sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 44de1453c301..ebc05af7343a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -795,12 +795,11 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
-		inode = iget(sb, EXT3_RESIZE_INO);
-		if (!inode || is_bad_inode(inode)) {
+		inode = ext3_iget(sb, EXT3_RESIZE_INO);
+		if (IS_ERR(inode)) {
 			ext3_warning(sb, __FUNCTION__,
 				     "Error opening resize inode");
-			iput(inode);
-			return -ENOENT;
+			return PTR_ERR(inode);
 		}
 	}
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 343677e8c350..cf2a2c3660ec 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -649,11 +649,10 @@ static struct inode *ext3_nfs_get_inode(struct super_block *sb,
 	 * Currently we don't know the generation for parent directory, so
 	 * a generation of 0 means "accept any"
 	 */
-	inode = iget(sb, ino);
-	if (inode == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (is_bad_inode(inode) ||
-	    (generation && inode->i_generation != generation)) {
+	inode = ext3_iget(sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (generation && inode->i_generation != generation) {
 		iput(inode);
 		return ERR_PTR(-ESTALE);
 	}
@@ -722,7 +721,6 @@ static struct quotactl_ops ext3_qctl_operations = {
 static const struct super_operations ext3_sops = {
 	.alloc_inode	= ext3_alloc_inode,
 	.destroy_inode	= ext3_destroy_inode,
-	.read_inode	= ext3_read_inode,
 	.write_inode	= ext3_write_inode,
 	.dirty_inode	= ext3_dirty_inode,
 	.delete_inode	= ext3_delete_inode,
@@ -1378,8 +1376,8 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	while (es->s_last_orphan) {
 		struct inode *inode;
 
-		if (!(inode =
-		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+		inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+		if (IS_ERR(inode)) {
 			es->s_last_orphan = 0;
 			break;
 		}
@@ -1508,6 +1506,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	int db_count;
 	int i;
 	int needs_recovery;
+	int ret = -EINVAL;
 	__le32 features;
 	int err;
 
@@ -1877,19 +1876,24 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * so we can safely mount the rest of the filesystem now.
 	 */
 
-	root = iget(sb, EXT3_ROOT_INO);
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
+	root = ext3_iget(sb, EXT3_ROOT_INO);
+	if (IS_ERR(root)) {
 		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
-		iput(root);
+		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-		dput(sb->s_root);
-		sb->s_root = NULL;
+		iput(root);
 		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
 		goto failed_mount4;
 	}
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		printk(KERN_ERR "EXT3-fs: get root dentry failed\n");
+		iput(root);
+		ret = -ENOMEM;
+		goto failed_mount4;
+	}
 
 	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
 	/*
@@ -1941,7 +1945,7 @@ out_fail:
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	lock_kernel();
-	return -EINVAL;
+	return ret;
 }
 
 /*
@@ -1977,8 +1981,8 @@ static journal_t *ext3_get_journal(struct super_block *sb,
 	 * things happen if we iget() an unused inode, as the subsequent
 	 * iput() will try to delete it. */
 
-	journal_inode = iget(sb, journal_inum);
-	if (!journal_inode) {
+	journal_inode = ext3_iget(sb, journal_inum);
+	if (IS_ERR(journal_inode)) {
 		printk(KERN_ERR "EXT3-fs: no journal found.\n");
 		return NULL;
 	}
@@ -1991,7 +1995,7 @@ static journal_t *ext3_get_journal(struct super_block *sb,
 
 	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
 		  journal_inode, journal_inode->i_size);
-	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
+	if (!S_ISREG(journal_inode->i_mode)) {
 		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
 		iput(journal_inode);
 		return NULL;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 241c01cb92b2..36c540396377 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -823,7 +823,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
 	int create, int extend_disksize);
 
-extern void ext3_read_inode (struct inode *);
+extern struct inode *ext3_iget(struct super_block *, unsigned long);
 extern int  ext3_write_inode (struct inode *, int);
 extern int  ext3_setattr (struct dentry *, struct iattr *);
 extern void ext3_delete_inode (struct inode *);
-- 
cgit 


From 1d1fe1ee02b9ac2660995b10e35dd41448fef011 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:37 -0800
Subject: iget: stop EXT4 from using iget() and read_inode()

Stop the EXT4 filesystem from using iget() and read_inode().  Replace
ext4_read_inode() with ext4_iget(), and call that instead of iget().
ext4_iget() then uses iget_locked() directly and returns a proper error code
instead of an inode in the event of an error.

ext4_fill_super() returns any error incurred when getting the root inode
instead of EINVAL.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ialloc.c        | 58 +++++++++++++++++++++++++++++--------------------
 fs/ext4/inode.c         | 25 ++++++++++++++++-----
 fs/ext4/namei.c         | 29 ++++++++-----------------
 fs/ext4/resize.c        |  7 +++---
 fs/ext4/super.c         | 36 ++++++++++++++++--------------
 include/linux/ext4_fs.h |  2 +-
 6 files changed, 87 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 575b5215c808..da18a74b966a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -782,14 +782,15 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
 	ext4_group_t block_group;
 	int bit;
-	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bitmap_bh;
 	struct inode *inode = NULL;
+	long err = -EIO;
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
 		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan ino %lu!  e2fsck was run?", ino);
-		goto out;
+		goto error;
 	}
 
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -798,38 +799,49 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	if (!bitmap_bh) {
 		ext4_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %lu", ino);
-		goto out;
+		goto error;
 	}
 
 	/* Having the inode bit set should be a 100% indicator that this
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 	 * inodes that were being truncated, so we can't check i_nlink==0.
 	 */
-	if (!ext4_test_bit(bit, bitmap_bh->b_data) ||
-			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
-			NEXT_ORPHAN(inode) > max_ino) {
-		ext4_warning(sb, __FUNCTION__,
-			     "bad orphan inode %lu!  e2fsck was run?", ino);
-		printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
-		       bit, (unsigned long long)bitmap_bh->b_blocknr,
-		       ext4_test_bit(bit, bitmap_bh->b_data));
-		printk(KERN_NOTICE "inode=%p\n", inode);
-		if (inode) {
-			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
-			       is_bad_inode(inode));
-			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
-			       NEXT_ORPHAN(inode));
-			printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
-		}
+	if (!ext4_test_bit(bit, bitmap_bh->b_data))
+		goto bad_orphan;
+
+	inode = ext4_iget(sb, ino);
+	if (IS_ERR(inode))
+		goto iget_failed;
+
+	if (NEXT_ORPHAN(inode) > max_ino)
+		goto bad_orphan;
+	brelse(bitmap_bh);
+	return inode;
+
+iget_failed:
+	err = PTR_ERR(inode);
+	inode = NULL;
+bad_orphan:
+	ext4_warning(sb, __FUNCTION__,
+		     "bad orphan inode %lu!  e2fsck was run?", ino);
+	printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+	       bit, (unsigned long long)bitmap_bh->b_blocknr,
+	       ext4_test_bit(bit, bitmap_bh->b_data));
+	printk(KERN_NOTICE "inode=%p\n", inode);
+	if (inode) {
+		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+		       is_bad_inode(inode));
+		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+		       NEXT_ORPHAN(inode));
+		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
 		/* Avoid freeing blocks if we got a bad deleted inode */
-		if (inode && inode->i_nlink == 0)
+		if (inode->i_nlink == 0)
 			inode->i_blocks = 0;
 		iput(inode);
-		inode = NULL;
 	}
-out:
 	brelse(bitmap_bh);
-	return inode;
+error:
+	return ERR_PTR(err);
 }
 
 unsigned long ext4_count_free_inodes (struct super_block * sb)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0e9055cf700e..f4e387452246 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2680,21 +2680,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
 	}
 }
 
-void ext4_read_inode(struct inode * inode)
+struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
 	struct ext4_iloc iloc;
 	struct ext4_inode *raw_inode;
-	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_inode_info *ei;
 	struct buffer_head *bh;
+	struct inode *inode;
+	long ret;
 	int block;
 
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	ei = EXT4_I(inode);
 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 	ei->i_acl = EXT4_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 
-	if (__ext4_get_inode_loc(inode, &iloc, 0))
+	ret = __ext4_get_inode_loc(inode, &iloc, 0);
+	if (ret < 0)
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = ext4_raw_inode(&iloc);
@@ -2720,6 +2730,7 @@ void ext4_read_inode(struct inode * inode)
 		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
 			/* this inode is deleted */
 			brelse (bh);
+			ret = -ESTALE;
 			goto bad_inode;
 		}
 		/* The only unlinked inodes we let through here have
@@ -2758,6 +2769,7 @@ void ext4_read_inode(struct inode * inode)
 		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
 		    EXT4_INODE_SIZE(inode->i_sb)) {
 			brelse (bh);
+			ret = -EIO;
 			goto bad_inode;
 		}
 		if (ei->i_extra_isize == 0) {
@@ -2811,11 +2823,12 @@ void ext4_read_inode(struct inode * inode)
 	}
 	brelse (iloc.bh);
 	ext4_set_inode_flags(inode);
-	return;
+	unlock_new_inode(inode);
+	return inode;
 
 bad_inode:
-	make_bad_inode(inode);
-	return;
+	iget_failed(inode);
+	return ERR_PTR(ret);
 }
 
 static int ext4_inode_blocks_set(handle_t *handle,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 67b6d8a1ceff..d153bb5922fc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1039,17 +1039,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str
 		if (!ext4_valid_inum(dir->i_sb, ino)) {
 			ext4_error(dir->i_sb, "ext4_lookup",
 				   "bad inode number: %lu", ino);
-			inode = NULL;
-		} else
-			inode = iget(dir->i_sb, ino);
-
-		if (!inode)
-			return ERR_PTR(-EACCES);
-
-		if (is_bad_inode(inode)) {
-			iput(inode);
-			return ERR_PTR(-ENOENT);
+			return ERR_PTR(-EIO);
 		}
+		inode = ext4_iget(dir->i_sb, ino);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1078,18 +1072,13 @@ struct dentry *ext4_get_parent(struct dentry *child)
 	if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
 		ext4_error(child->d_inode->i_sb, "ext4_get_parent",
 			   "bad inode number: %lu", ino);
-		inode = NULL;
-	} else
-		inode = iget(child->d_inode->i_sb, ino);
-
-	if (!inode)
-		return ERR_PTR(-EACCES);
-
-	if (is_bad_inode(inode)) {
-		iput(inode);
-		return ERR_PTR(-ENOENT);
+		return ERR_PTR(-EIO);
 	}
 
+	inode = ext4_iget(child->d_inode->i_sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 4fbba60816f4..9477a2bd6ff2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -779,12 +779,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
-		inode = iget(sb, EXT4_RESIZE_INO);
-		if (!inode || is_bad_inode(inode)) {
+		inode = ext4_iget(sb, EXT4_RESIZE_INO);
+		if (IS_ERR(inode)) {
 			ext4_warning(sb, __FUNCTION__,
 				     "Error opening resize inode");
-			iput(inode);
-			return -ENOENT;
+			return PTR_ERR(inode);
 		}
 	}
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c89bb8797765..93beb865c20d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -777,11 +777,10 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 	 * Currently we don't know the generation for parent directory, so
 	 * a generation of 0 means "accept any"
 	 */
-	inode = iget(sb, ino);
-	if (inode == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (is_bad_inode(inode) ||
-	    (generation && inode->i_generation != generation)) {
+	inode = ext4_iget(sb, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (generation && inode->i_generation != generation) {
 		iput(inode);
 		return ERR_PTR(-ESTALE);
 	}
@@ -850,7 +849,6 @@ static struct quotactl_ops ext4_qctl_operations = {
 static const struct super_operations ext4_sops = {
 	.alloc_inode	= ext4_alloc_inode,
 	.destroy_inode	= ext4_destroy_inode,
-	.read_inode	= ext4_read_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
 	.delete_inode	= ext4_delete_inode,
@@ -1805,6 +1803,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	unsigned long journal_devnum = 0;
 	unsigned long def_mount_opts;
 	struct inode *root;
+	int ret = -EINVAL;
 	int blocksize;
 	int db_count;
 	int i;
@@ -2237,19 +2236,24 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 * so we can safely mount the rest of the filesystem now.
 	 */
 
-	root = iget(sb, EXT4_ROOT_INO);
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
+	root = ext4_iget(sb, EXT4_ROOT_INO);
+	if (IS_ERR(root)) {
 		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
-		iput(root);
+		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-		dput(sb->s_root);
-		sb->s_root = NULL;
+		iput(root);
 		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
 		goto failed_mount4;
 	}
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
+		iput(root);
+		ret = -ENOMEM;
+		goto failed_mount4;
+	}
 
 	ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
 
@@ -2330,7 +2334,7 @@ out_fail:
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	lock_kernel();
-	return -EINVAL;
+	return ret;
 }
 
 /*
@@ -2366,8 +2370,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 	 * things happen if we iget() an unused inode, as the subsequent
 	 * iput() will try to delete it. */
 
-	journal_inode = iget(sb, journal_inum);
-	if (!journal_inode) {
+	journal_inode = ext4_iget(sb, journal_inum);
+	if (IS_ERR(journal_inode)) {
 		printk(KERN_ERR "EXT4-fs: no journal found.\n");
 		return NULL;
 	}
@@ -2380,7 +2384,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 
 	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
 		  journal_inode, journal_inode->i_size);
-	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
+	if (!S_ISREG(journal_inode->i_mode)) {
 		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
 		iput(journal_inode);
 		return NULL;
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 1852313fc7c7..c4f635a4dd25 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1024,7 +1024,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 				struct buffer_head *bh_result,
 				int create, int extend_disksize);
 
-extern void ext4_read_inode (struct inode *);
+extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern int  ext4_write_inode (struct inode *, int);
 extern int  ext4_setattr (struct dentry *, struct iattr *);
 extern void ext4_delete_inode (struct inode *);
-- 
cgit 


From 2b7e5bcbd9e03f7236d2869f4261059074ea50a2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:45 -0800
Subject: iget: stop QNX4 from using iget() and read_inode()

Stop the QNX4 filesystem from using iget() and read_inode().  Replace
qnx4_read_inode() with qnx4_iget(), and call that instead of iget().
qnx4_iget() then uses iget_locked() directly and returns a proper error code
instead of an inode in the event of an error.

qnx4_fill_super() returns any error incurred when getting the root inode
instead of EINVAL.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Anders Larsen <al@alarsen.net>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/qnx4/inode.c         | 47 ++++++++++++++++++++++++++++++++---------------
 fs/qnx4/namei.c         |  8 +++++---
 include/linux/qnx4_fs.h |  1 +
 3 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 638bdb963213..b31ab78052b3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -125,7 +125,6 @@ static int qnx4_write_inode(struct inode *inode, int unused)
 static void qnx4_put_super(struct super_block *sb);
 static struct inode *qnx4_alloc_inode(struct super_block *sb);
 static void qnx4_destroy_inode(struct inode *inode);
-static void qnx4_read_inode(struct inode *);
 static int qnx4_remount(struct super_block *sb, int *flags, char *data);
 static int qnx4_statfs(struct dentry *, struct kstatfs *);
 
@@ -133,7 +132,6 @@ static const struct super_operations qnx4_sops =
 {
 	.alloc_inode	= qnx4_alloc_inode,
 	.destroy_inode	= qnx4_destroy_inode,
-	.read_inode	= qnx4_read_inode,
 	.put_super	= qnx4_put_super,
 	.statfs		= qnx4_statfs,
 	.remount_fs	= qnx4_remount,
@@ -357,6 +355,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 	struct inode *root;
 	const char *errmsg;
 	struct qnx4_sb_info *qs;
+	int ret = -EINVAL;
 
 	qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL);
 	if (!qs)
@@ -396,12 +395,14 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
  	/* does root not have inode number QNX4_ROOT_INO ?? */
- 	root = iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK);
- 	if (!root) {
+	root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK);
+	if (IS_ERR(root)) {
  		printk("qnx4: get inode failed\n");
+		ret = PTR_ERR(root);
  		goto out;
  	}
 
+	ret = -ENOMEM;
  	s->s_root = d_alloc_root(root);
  	if (s->s_root == NULL)
  		goto outi;
@@ -417,7 +418,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
       outnobh:
 	kfree(qs);
 	s->s_fs_info = NULL;
-	return -EINVAL;
+	return ret;
 }
 
 static void qnx4_put_super(struct super_block *sb)
@@ -462,29 +463,38 @@ static const struct address_space_operations qnx4_aops = {
 	.bmap		= qnx4_bmap
 };
 
-static void qnx4_read_inode(struct inode *inode)
+struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
 {
 	struct buffer_head *bh;
 	struct qnx4_inode_entry *raw_inode;
-	int block, ino;
-	struct super_block *sb = inode->i_sb;
-	struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
+	int block;
+	struct qnx4_inode_entry *qnx4_inode;
+	struct inode *inode;
 
-	ino = inode->i_ino;
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	qnx4_inode = qnx4_raw_inode(inode);
 	inode->i_mode = 0;
 
 	QNX4DEBUG(("Reading inode : [%d]\n", ino));
 	if (!ino) {
-		printk("qnx4: bad inode number on dev %s: %d is out of range\n",
+		printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is "
+				"out of range\n",
 		       sb->s_id, ino);
-		return;
+		iget_failed(inode);
+		return ERR_PTR(-EIO);
 	}
 	block = ino / QNX4_INODES_PER_BLOCK;
 
 	if (!(bh = sb_bread(sb, block))) {
 		printk("qnx4: major problem: unable to read inode from dev "
 		       "%s\n", sb->s_id);
-		return;
+		iget_failed(inode);
+		return ERR_PTR(-EIO);
 	}
 	raw_inode = ((struct qnx4_inode_entry *) bh->b_data) +
 	    (ino % QNX4_INODES_PER_BLOCK);
@@ -515,9 +525,16 @@ static void qnx4_read_inode(struct inode *inode)
 		inode->i_op = &page_symlink_inode_operations;
 		inode->i_mapping->a_ops = &qnx4_aops;
 		qnx4_i(inode)->mmu_private = inode->i_size;
-	} else
-		printk("qnx4: bad inode %d on dev %s\n",ino,sb->s_id);
+	} else {
+		printk(KERN_ERR "qnx4: bad inode %lu on dev %s\n",
+			ino, sb->s_id);
+		iget_failed(inode);
+		brelse(bh);
+		return ERR_PTR(-EIO);
+	}
 	brelse(bh);
+	unlock_new_inode(inode);
+	return inode;
 }
 
 static struct kmem_cache *qnx4_inode_cachep;
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 733cdf01d645..775eed3a4085 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -128,10 +128,12 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam
 	}
 	brelse(bh);
 
-	if ((foundinode = iget(dir->i_sb, ino)) == NULL) {
+	foundinode = qnx4_iget(dir->i_sb, ino);
+	if (IS_ERR(foundinode)) {
 		unlock_kernel();
-		QNX4DEBUG(("qnx4: lookup->iget -> NULL\n"));
-		return ERR_PTR(-EACCES);
+		QNX4DEBUG(("qnx4: lookup->iget -> error %ld\n",
+			   PTR_ERR(foundinode)));
+		return ERR_CAST(foundinode);
 	}
 out:
 	unlock_kernel();
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 19bc9b8b6191..34a196ee7941 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -110,6 +110,7 @@ struct qnx4_inode_info {
 	struct inode vfs_inode;
 };
 
+extern struct inode *qnx4_iget(struct super_block *, unsigned long);
 extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
 extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
 extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
-- 
cgit 


From 12debc4248a4a7f1873e47cda2cdd7faca80b099 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Feb 2008 00:15:52 -0800
Subject: iget: remove iget() and the read_inode() super op as being obsolete

Remove the old iget() call and the read_inode() superblock operation it uses
as these are really obsolete, and the use of read_inode() does not produce
proper error handling (no distinction between ENOMEM and EIO when marking an
inode bad).

Furthermore, this removes the temptation to use iget() to find an inode by
number in a filesystem from code outside that filesystem.

iget_locked() should be used instead.  A new function is added in an earlier
patch (iget_failed) that is to be called to mark an inode as bad, unlock it
and release it should the get routine fail.  Mark iget() and read_inode() as
being obsolete and remove references to them from the documentation.

Typically a filesystem will be modified such that the read_inode function
becomes an internal iget function, for example the following:

	void thingyfs_read_inode(struct inode *inode)
	{
		...
	}

would be changed into something like:

	struct inode *thingyfs_iget(struct super_block *sp, unsigned long ino)
	{
		struct inode *inode;
		int ret;

		inode = iget_locked(sb, ino);
		if (!inode)
			return ERR_PTR(-ENOMEM);
		if (!(inode->i_state & I_NEW))
			return inode;

		...
		unlock_new_inode(inode);
		return inode;
	error:
		iget_failed(inode);
		return ERR_PTR(ret);
	}

and then thingyfs_iget() would be called rather than iget(), for example:

	ret = -EINVAL;
	inode = iget(sb, ino);
	if (!inode || is_bad_inode(inode))
		goto error;

becomes:

	inode = thingyfs_iget(sb, ino);
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		goto error;
	}

Note that is_bad_inode() does not need to be called.  The error returned by
thingyfs_iget() should render it unnecessary.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |  3 ---
 Documentation/filesystems/porting | 12 ++++++------
 Documentation/filesystems/vfs.txt | 17 +++--------------
 fs/inode.c                        |  4 ----
 include/linux/fs.h                | 14 --------------
 5 files changed, 9 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 37c10cba7177..42d4b30b1045 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -90,7 +90,6 @@ of the locking scheme for directory operations.
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
-	void (*read_inode) (struct inode *);
 	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
 	void (*put_inode) (struct inode *);
@@ -114,7 +113,6 @@ locking rules:
 			BKL	s_lock	s_umount
 alloc_inode:		no	no	no
 destroy_inode:		no
-read_inode:		no				(see below)
 dirty_inode:		no				(must not sleep)
 write_inode:		no
 put_inode:		no
@@ -133,7 +131,6 @@ show_options:		no				(vfsmount->sem)
 quota_read:		no	no	no		(see below)
 quota_write:		no	no	no		(see below)
 
-->read_inode() is not a method - it's a callback used in iget().
 ->remount_fs() will have the s_umount lock if it's already mounted.
 When called from get_sb_single, it does NOT have the s_umount lock.
 ->quota_read() and ->quota_write() functions are both guaranteed to
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index fbd3815a5f57..92b888d540a6 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -34,8 +34,8 @@ FOO_I(inode) (see in-tree filesystems for examples).
 
 Make them ->alloc_inode and ->destroy_inode in your super_operations.
 
-Keep in mind that now you need explicit initialization of private data -
-typically in ->read_inode() and after getting an inode from new_inode().
+Keep in mind that now you need explicit initialization of private data
+typically between calling iget_locked() and unlocking the inode.
 
 At some point that will become mandatory.
 
@@ -173,10 +173,10 @@ should be a non-blocking function that initializes those parts of a
 newly created inode to allow the test function to succeed. 'data' is
 passed as an opaque value to both test and set functions.
 
-When the inode has been created by iget5_locked(), it will be returned with
-the I_NEW flag set and will still be locked. read_inode has not been
-called so the file system still has to finalize the initialization. Once
-the inode is initialized it must be unlocked by calling unlock_new_inode().
+When the inode has been created by iget5_locked(), it will be returned with the
+I_NEW flag set and will still be locked.  The filesystem then needs to finalize
+the initialization. Once the inode is initialized it must be unlocked by
+calling unlock_new_inode().
 
 The filesystem is responsible for setting (and possibly testing) i_ino
 when appropriate. There is also a simpler iget_locked function that
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 9d019d35728f..bd55038b56f5 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -203,8 +203,6 @@ struct super_operations {
         struct inode *(*alloc_inode)(struct super_block *sb);
         void (*destroy_inode)(struct inode *);
 
-        void (*read_inode) (struct inode *);
-
         void (*dirty_inode) (struct inode *);
         int (*write_inode) (struct inode *, int);
         void (*put_inode) (struct inode *);
@@ -242,15 +240,6 @@ or bottom half).
   	->alloc_inode was defined and simply undoes anything done by
 	->alloc_inode.
 
-  read_inode: this method is called to read a specific inode from the
-        mounted filesystem.  The i_ino member in the struct inode is
-	initialized by the VFS to indicate which inode to read. Other
-	members are filled in by this method.
-
-	You can set this to NULL and use iget5_locked() instead of iget()
-	to read inodes.  This is necessary for filesystems for which the
-	inode number is not sufficient to identify an inode.
-
   dirty_inode: this method is called by the VFS to mark an inode dirty.
 
   write_inode: this method is called when the VFS needs to write an
@@ -308,9 +297,9 @@ or bottom half).
 
   quota_write: called by the VFS to write to filesystem quota file.
 
-The read_inode() method is responsible for filling in the "i_op"
-field. This is a pointer to a "struct inode_operations" which
-describes the methods that can be performed on individual inodes.
+Whoever sets up the inode is responsible for filling in the "i_op" field. This
+is a pointer to a "struct inode_operations" which describes the methods that
+can be performed on individual inodes.
 
 
 The Inode Object
diff --git a/fs/inode.c b/fs/inode.c
index 276ffd6b6fdd..53245ffcf93d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -928,8 +928,6 @@ EXPORT_SYMBOL(ilookup);
  * @set:	callback used to initialize a new struct inode
  * @data:	opaque data pointer to pass to @test and @set
  *
- * This is iget() without the read_inode() portion of get_new_inode().
- *
  * iget5_locked() uses ifind() to search for the inode specified by @hashval
  * and @data in the inode cache and if present it is returned with an increased
  * reference count. This is a generalized version of iget_locked() for file
@@ -966,8 +964,6 @@ EXPORT_SYMBOL(iget5_locked);
  * @sb:		super block of file system
  * @ino:	inode number to get
  *
- * This is iget() without the read_inode() portion of get_new_inode_fast().
- *
  * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
  * the inode cache and if present it is returned with an increased reference
  * count. This is for file systems where the inode number is sufficient for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d202600d36bd..36b7abefacbe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1241,8 +1241,6 @@ struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
-	void (*read_inode) (struct inode *);
-  
    	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
 	void (*put_inode) (struct inode *);
@@ -1767,18 +1765,6 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te
 extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern void unlock_new_inode(struct inode *);
 
-static inline struct inode *iget(struct super_block *sb, unsigned long ino)
-{
-	struct inode *inode = iget_locked(sb, ino);
-	
-	if (inode && (inode->i_state & I_NEW)) {
-		sb->s_op->read_inode(inode);
-		unlock_new_inode(inode);
-	}
-
-	return inode;
-}
-
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
-- 
cgit 


From c1445db9f72db0537c43a2eab6e1b0f6741162f5 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <k.shutemov@gmail.com>
Date: Thu, 7 Feb 2008 00:15:53 -0800
Subject: Unexport asm/user.h and linux/user.h

Do not export asm/user.h and linux/user.h during make headers_install.

Signed-off-by: Kirill A. Shutemov <k.shutemov@gmail.com>
Reviewed-by: David Woodhouse <dwmw2@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/Kbuild.asm | 1 -
 include/asm-x86/Kbuild         | 2 --
 include/linux/Kbuild           | 1 -
 include/linux/elfcore.h        | 2 ++
 4 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 8fd81713cfc0..1f8c3df1c26d 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -27,7 +27,6 @@ unifdef-y += termbits.h
 unifdef-y += termios.h
 unifdef-y += types.h
 unifdef-y += unistd.h
-unifdef-y += user.h
 
 # These probably shouldn't be exported
 unifdef-y += elf.h
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 3c6f0f80e827..b04a7ff46df1 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -22,7 +22,5 @@ unifdef-y += posix_types_64.h
 unifdef-y += ptrace.h
 unifdef-y += unistd_32.h
 unifdef-y += unistd_64.h
-unifdef-y += user_32.h
-unifdef-y += user_64.h
 unifdef-y += vm86.h
 unifdef-y += vsyscall.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 93631229fd5c..6723bc973dac 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -344,7 +344,6 @@ unifdef-y += uinput.h
 unifdef-y += uio.h
 unifdef-y += unistd.h
 unifdef-y += usbdevice_fs.h
-unifdef-y += user.h
 unifdef-y += utsname.h
 unifdef-y += videodev2.h
 unifdef-y += videodev.h
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 9631dddae348..0432de7d4707 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -4,7 +4,9 @@
 #include <linux/types.h>
 #include <linux/signal.h>
 #include <linux/time.h>
+#ifdef __KERNEL__
 #include <linux/user.h>
+#endif
 #include <linux/ptrace.h>
 
 struct elf_siginfo
-- 
cgit 


From 6cc931b9b5ec652c90b928f3ec2163f261552c91 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <k.shutemov@gmail.com>
Date: Thu, 7 Feb 2008 00:15:55 -0800
Subject: Unexport asm/elf.h

Do not export asm/elf.h during make headers_install.

Signed-off-by: Kirill A. Shutemov <k.shutemov@gmail.com>
Reviewed-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/Kbuild.asm | 1 -
 include/linux/Kbuild           | 2 +-
 include/linux/elf.h            | 2 ++
 include/linux/elfcore.h        | 2 ++
 4 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 1f8c3df1c26d..d00fd67e6367 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -29,5 +29,4 @@ unifdef-y += types.h
 unifdef-y += unistd.h
 
 # These probably shouldn't be exported
-unifdef-y += elf.h
 unifdef-y += page.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 6723bc973dac..0d4f55d97ae9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -60,7 +60,6 @@ header-y += dqblk_v2.h
 header-y += dqblk_xfs.h
 header-y += efs_fs_sb.h
 header-y += elf-fdpic.h
-header-y += elf.h
 header-y += elf-em.h
 header-y += fadvise.h
 header-y += fd.h
@@ -190,6 +189,7 @@ unifdef-y += dccp.h
 unifdef-y += dirent.h
 unifdef-y += dlm.h
 unifdef-y += edd.h
+unifdef-y += elf.h
 unifdef-y += elfcore.h
 unifdef-y += errno.h
 unifdef-y += errqueue.h
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 7ceb24d87c1a..ed98c761bae7 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -3,7 +3,9 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
+#ifdef __KERNEL__
 #include <asm/elf.h>
+#endif
 
 struct file;
 
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 0432de7d4707..5ca54d77079f 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -16,7 +16,9 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
+#ifdef __KERNEL__
 #include <asm/elf.h>
+#endif
 
 #ifndef __KERNEL__
 typedef elf_greg_t greg_t;
-- 
cgit 


From ed7b1889da256977574663689b598d88950bbd23 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <k.shutemov@gmail.com>
Date: Thu, 7 Feb 2008 00:15:56 -0800
Subject: Unexport asm/page.h

Do not export asm/page.h during make headers_install.  This removes PAGE_SIZE
from userspace headers.

Signed-off-by: Kirill A. Shutemov <k.shutemov@gmail.com>
Reviewed-by: David Woodhouse <dwmw2@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-frv/Kbuild         | 1 -
 include/asm-generic/Kbuild.asm | 3 ---
 include/asm-s390/kexec.h       | 2 ++
 include/linux/Kbuild           | 1 -
 include/linux/a.out.h          | 8 ++++++++
 include/linux/shm.h            | 8 ++++++++
 6 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild
index 966a9836d556..bc3f12c5b7e0 100644
--- a/include/asm-frv/Kbuild
+++ b/include/asm-frv/Kbuild
@@ -4,4 +4,3 @@ header-y += registers.h
 
 unifdef-y += termios.h
 unifdef-y += ptrace.h
-unifdef-y += page.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index d00fd67e6367..57ba60635959 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -27,6 +27,3 @@ unifdef-y += termbits.h
 unifdef-y += termios.h
 unifdef-y += types.h
 unifdef-y += unistd.h
-
-# These probably shouldn't be exported
-unifdef-y += page.h
diff --git a/include/asm-s390/kexec.h b/include/asm-s390/kexec.h
index 7592af708b41..f219c6411e0b 100644
--- a/include/asm-s390/kexec.h
+++ b/include/asm-s390/kexec.h
@@ -10,7 +10,9 @@
 #ifndef _S390_KEXEC_H
 #define _S390_KEXEC_H
 
+#ifdef __KERNEL__
 #include <asm/page.h>
+#endif
 #include <asm/processor.h>
 /*
  * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 0d4f55d97ae9..2ebf068ba504 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -18,7 +18,6 @@ header-y += usb/
 
 header-y += affs_hardblocks.h
 header-y += aio_abi.h
-header-y += a.out.h
 header-y += arcfb.h
 header-y += atmapi.h
 header-y += atmbr2684.h
diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index f913cc3e1b0d..82cd918f2ab7 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -128,12 +128,20 @@ enum machine_type {
 #endif
 
 #ifdef linux
+#ifdef __KERNEL__
 #include <asm/page.h>
+#else
+#include <unistd.h>
+#endif
 #if defined(__i386__) || defined(__mc68000__)
 #define SEGMENT_SIZE	1024
 #else
 #ifndef SEGMENT_SIZE
+#ifdef __KERNEL__
 #define SEGMENT_SIZE	PAGE_SIZE
+#else
+#define SEGMENT_SIZE   getpagesize()
+#endif
 #endif
 #endif
 #endif
diff --git a/include/linux/shm.h b/include/linux/shm.h
index eeaed921a1dc..eca6235a46c0 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -3,7 +3,11 @@
 
 #include <linux/ipc.h>
 #include <linux/errno.h>
+#ifdef __KERNEL__
 #include <asm/page.h>
+#else
+#include <unistd.h>
+#endif
 
 /*
  * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
@@ -13,7 +17,11 @@
 #define SHMMAX 0x2000000		 /* max shared seg size (bytes) */
 #define SHMMIN 1			 /* min shared seg size (bytes) */
 #define SHMMNI 4096			 /* max num of segs system wide */
+#ifdef __KERNEL__
 #define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */
+#else
+#define SHMALL (SHMMAX/getpagesize()*(SHMMNI/16))
+#endif
 #define SHMSEG SHMMNI			 /* max shared segs per process */
 
 #ifdef __KERNEL__
-- 
cgit 


From 1237a2ef31cf60e01bbecbe198d8c002bbb710db Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 7 Feb 2008 00:16:34 -0800
Subject: Char: char/serial, remove SERIAL_TYPE_NORMAL redefines

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/epca.h          | 1 -
 drivers/char/esp.c           | 3 ---
 drivers/char/ip2/ip2main.c   | 3 ---
 drivers/char/mxser.c         | 3 ---
 drivers/char/serial167.c     | 2 --
 drivers/char/sx.h            | 2 --
 drivers/serial/68328serial.c | 3 ---
 drivers/serial/crisv10.c     | 5 -----
 include/linux/isicom.h       | 2 --
 9 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/epca.h b/drivers/char/epca.h
index a297238cd3ba..3c77c02b5d65 100644
--- a/drivers/char/epca.h
+++ b/drivers/char/epca.h
@@ -77,7 +77,6 @@ static char *board_desc[] =
 #define ON         1
 
 #define FEPTIMEOUT 200000  
-#define SERIAL_TYPE_NORMAL  1
 #define SERIAL_TYPE_INFO    3
 #define EPCA_EVENT_HANGUP   1
 #define EPCA_MAGIC          0x5c6df104L
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 28607763ae64..b5df2dc40491 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -111,9 +111,6 @@ static char serial_version[] __initdata = "2.2";
 
 static struct tty_driver *esp_driver;
 
-/* serial subtype definitions */
-#define SERIAL_TYPE_NORMAL	1
-
 /*
  * Serial driver configuration section.  Here are the various options:
  *
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 0f49ccf02a7f..b1d6cad84282 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -153,9 +153,6 @@ static char *pcVersion = "1.2.14";
 static char *pcDriver_name   = "ip2";
 static char *pcIpl    		 = "ip2ipl";
 
-/* Serial subtype definitions */
-#define SERIAL_TYPE_NORMAL    1
-
 // cheezy kludge or genius - you decide?
 int ip2_loadmain(int *, int *, unsigned char *, int);
 static unsigned char *Fip_firmware;
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 47420787a017..5d7901096048 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -81,9 +81,6 @@
 #define	MXSER_ERR_IRQ_CONFLIT	-3
 #define	MXSER_ERR_VECTOR	-4
 
-#define SERIAL_TYPE_NORMAL	1
-#define SERIAL_TYPE_CALLOUT	2
-
 #define WAKEUP_CHARS		256
 
 #define UART_MCR_AFE		0x20
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index f1497cecffd8..cbf21cc7b56d 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -90,8 +90,6 @@
 
 #define STD_COM_FLAGS (0)
 
-#define SERIAL_TYPE_NORMAL  1
-
 static struct tty_driver *cy_serial_driver;
 extern int serial_console;
 static struct cyclades_port *serial_console_info = NULL;
diff --git a/drivers/char/sx.h b/drivers/char/sx.h
index 70d9783c7323..87c2defdead7 100644
--- a/drivers/char/sx.h
+++ b/drivers/char/sx.h
@@ -88,8 +88,6 @@ struct vpd_prom {
 
 #define IS_CF_BOARD(board) (board->flags & (SX_CFISA_BOARD | SX_CFPCI_BOARD))
 
-#define SERIAL_TYPE_NORMAL 1
-
 /* The SI processor clock is required to calculate the cc_int_count register
    value for the SI cards. */
 #define SI_PROCESSOR_CLOCK 25000000
diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 0d99120ab5a2..2b8a410e0959 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -84,9 +84,6 @@ extern wait_queue_head_t keypress_wait;
 
 struct tty_driver *serial_driver;
 
-/* serial subtype definitions */
-#define SERIAL_TYPE_NORMAL	1
- 
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index a4e23cf47906..383c4e660cd5 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -68,11 +68,6 @@ static char *serial_version = "$Revision: 1.25 $";
 
 struct tty_driver *serial_driver;
 
-/* serial subtype definitions */
-#ifndef SERIAL_TYPE_NORMAL
-#define SERIAL_TYPE_NORMAL	1
-#endif
-
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
diff --git a/include/linux/isicom.h b/include/linux/isicom.h
index 45b3d48f0978..8f4c71759d73 100644
--- a/include/linux/isicom.h
+++ b/include/linux/isicom.h
@@ -37,8 +37,6 @@
 #define		BOARD_COUNT	4
 #define		PORT_COUNT	(BOARD_COUNT*16)
 
-#define		SERIAL_TYPE_NORMAL	1
-
 /*   character sizes  */
 
 #define		ISICOM_CS5		0x0000
-- 
cgit 


From d2e7a4b66d762cad383c5469c1e8b6076792ab6a Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 7 Feb 2008 00:16:37 -0800
Subject: Char: esp, remove hangup and wakeup bottomhalves

There is no need to schedule a bottomhalf for either of them. One is fast
and the another schedules a bottomhalf itself.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/esp.c       | 58 +++++-------------------------------------------
 include/linux/hayesesp.h |  4 ----
 2 files changed, 5 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index b5df2dc40491..c01e26d9ee5e 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -242,17 +242,6 @@ static void rs_start(struct tty_struct *tty)
  * -----------------------------------------------------------------------
  */
 
-/*
- * This routine is used by the interrupt handler to schedule
- * processing in the software interrupt portion of the driver.
- */
-static inline void rs_sched_event(struct esp_struct *info,
-				  int event)
-{
-	info->event |= 1 << event;
-	schedule_work(&info->tqueue);
-}
-
 static DEFINE_SPINLOCK(pio_lock);
 
 static inline struct esp_pio_buffer *get_pio_buffer(void)
@@ -474,7 +463,8 @@ static inline void transmit_chars_pio(struct esp_struct *info,
 	}
 
 	if (info->xmit_cnt < WAKEUP_CHARS) {
-		rs_sched_event(info, ESP_EVENT_WRITE_WAKEUP);
+		if (info->tty)
+			tty_wakeup(info->tty);
 
 #ifdef SERIAL_DEBUG_INTR
 		printk("THRE...");
@@ -512,7 +502,8 @@ static inline void transmit_chars_dma(struct esp_struct *info, int num_bytes)
 	info->xmit_tail = (info->xmit_tail + dma_bytes) & (ESP_XMIT_SIZE - 1);
 
 	if (info->xmit_cnt < WAKEUP_CHARS) {
-		rs_sched_event(info, ESP_EVENT_WRITE_WAKEUP);
+		if (info->tty)
+			tty_wakeup(info->tty);
 
 #ifdef SERIAL_DEBUG_INTR
 		printk("THRE...");
@@ -604,7 +595,7 @@ static inline void check_modem_status(struct esp_struct *info)
 #ifdef SERIAL_DEBUG_OPEN
 			printk("scheduling hangup...");
 #endif
-			schedule_work(&info->tqueue_hangup);
+			tty_hangup(info->tty);
 		}
 	}
 }
@@ -720,41 +711,6 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
  * -------------------------------------------------------------------
  */
 
-static void do_softint(struct work_struct *work)
-{
-	struct esp_struct	*info =
-		container_of(work, struct esp_struct, tqueue);
-	struct tty_struct	*tty;
-	
-	tty = info->tty;
-	if (!tty)
-		return;
-
-	if (test_and_clear_bit(ESP_EVENT_WRITE_WAKEUP, &info->event)) {
-		tty_wakeup(tty);
-	}
-}
-
-/*
- * This routine is called from the scheduler tqueue when the interrupt
- * routine has signalled that a hangup has occurred.  The path of
- * hangup processing is:
- *
- * 	serial interrupt routine -> (scheduler tqueue) ->
- * 	do_serial_hangup() -> tty->hangup() -> esp_hangup()
- * 
- */
-static void do_serial_hangup(struct work_struct *work)
-{
-	struct esp_struct	*info =
-		container_of(work, struct esp_struct, tqueue_hangup);
-	struct tty_struct	*tty;
-	
-	tty = info->tty;
-	if (tty)
-		tty_hangup(tty);
-}
-
 /*
  * ---------------------------------------------------------------
  * Low level utility subroutines for the serial driver:  routines to
@@ -2038,7 +1994,6 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 		tty->driver->flush_buffer(tty);
 	tty_ldisc_flush(tty);
 	tty->closing = 0;
-	info->event = 0;
 	info->tty = NULL;
 
 	if (info->blocked_open) {
@@ -2106,7 +2061,6 @@ static void esp_hangup(struct tty_struct *tty)
 	
 	rs_flush_buffer(tty);
 	shutdown(info);
-	info->event = 0;
 	info->count = 0;
 	info->flags &= ~ASYNC_NORMAL_ACTIVE;
 	info->tty = NULL;
@@ -2492,8 +2446,6 @@ static int __init espserial_init(void)
 		info->magic = ESP_MAGIC;
 		info->close_delay = 5*HZ/10;
 		info->closing_wait = 30*HZ;
-		INIT_WORK(&info->tqueue, do_softint);
-		INIT_WORK(&info->tqueue_hangup, do_serial_hangup);
 		info->config.rx_timeout = rx_timeout;
 		info->config.flow_on = flow_on;
 		info->config.flow_off = flow_off;
diff --git a/include/linux/hayesesp.h b/include/linux/hayesesp.h
index b436be7a7fff..2177ee5b2fe2 100644
--- a/include/linux/hayesesp.h
+++ b/include/linux/hayesesp.h
@@ -71,7 +71,6 @@ struct hayes_esp_config {
 #define ESP_STAT_NEVER_DMA      0x08
 #define ESP_STAT_USE_PIO        0x10
 
-#define ESP_EVENT_WRITE_WAKEUP	0
 #define ESP_MAGIC		0x53ee
 #define ESP_XMIT_SIZE		4096
 
@@ -92,7 +91,6 @@ struct esp_struct {
 	unsigned short		closing_wait2;
 	int			IER; 	/* Interrupt Enable Register */
 	int			MCR; 	/* Modem control register */
-	unsigned long		event;
 	unsigned long		last_active;
 	int			line;
 	int			count;	    /* # of fd on device */
@@ -101,8 +99,6 @@ struct esp_struct {
 	int			xmit_head;
 	int			xmit_tail;
 	int			xmit_cnt;
-	struct work_struct	tqueue;
-	struct work_struct	tqueue_hangup;
 	wait_queue_head_t	open_wait;
 	wait_queue_head_t	close_wait;
 	wait_queue_head_t	delta_msr_wait;
-- 
cgit 


From cfccaeea62f020242e59a992e1f1a60fe7e5694e Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 7 Feb 2008 00:16:38 -0800
Subject: Char: istallion, remove hangup bottomhalf

tty_hangup schedules a work for hangup itself, no need to do it in the driver.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/istallion.c  | 23 +----------------------
 include/linux/istallion.h |  1 -
 2 files changed, 1 insertion(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 1f27be1ec3d4..c645455c3fd1 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -627,7 +627,6 @@ static int	stli_initopen(struct stlibrd *brdp, struct stliport *portp);
 static int	stli_rawopen(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
 static int	stli_rawclose(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
 static int	stli_waitcarrier(struct stlibrd *brdp, struct stliport *portp, struct file *filp);
-static void	stli_dohangup(struct work_struct *);
 static int	stli_setport(struct stliport *portp);
 static int	stli_cmdwait(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
 static void	stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
@@ -1823,25 +1822,6 @@ static void stli_start(struct tty_struct *tty)
 
 /*****************************************************************************/
 
-/*
- *	Scheduler called hang up routine. This is called from the scheduler,
- *	not direct from the driver "poll" routine. We can't call it there
- *	since the real local hangup code will enable/disable the board and
- *	other things that we can't do while handling the poll. Much easier
- *	to deal with it some time later (don't really care when, hangups
- *	aren't that time critical).
- */
-
-static void stli_dohangup(struct work_struct *ugly_api)
-{
-	struct stliport *portp = container_of(ugly_api, struct stliport, tqhangup);
-	if (portp->tty != NULL) {
-		tty_hangup(portp->tty);
-	}
-}
-
-/*****************************************************************************/
-
 /*
  *	Hangup this port. This is pretty much like closing the port, only
  *	a little more brutal. No waiting for data to drain. Shutdown the
@@ -2405,7 +2385,7 @@ static int stli_hostcmd(struct stlibrd *brdp, struct stliport *portp)
 			    ((portp->sigs & TIOCM_CD) == 0)) {
 				if (portp->flags & ASYNC_CHECK_CD) {
 					if (tty)
-						schedule_work(&portp->tqhangup);
+						tty_hangup(tty);
 				}
 			}
 		}
@@ -2733,7 +2713,6 @@ static int stli_initports(struct stlibrd *brdp)
 		portp->baud_base = STL_BAUDBASE;
 		portp->close_delay = STL_CLOSEDELAY;
 		portp->closing_wait = 30 * HZ;
-		INIT_WORK(&portp->tqhangup, stli_dohangup);
 		init_waitqueue_head(&portp->open_wait);
 		init_waitqueue_head(&portp->close_wait);
 		init_waitqueue_head(&portp->raw_wait);
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 106a5e85e5c4..5a84fe944b74 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -71,7 +71,6 @@ struct stliport {
 	wait_queue_head_t	open_wait;
 	wait_queue_head_t	close_wait;
 	wait_queue_head_t	raw_wait;
-	struct work_struct	tqhangup;
 	struct asysigs		asig;
 	unsigned long		addr;
 	unsigned long		rxoffset;
-- 
cgit 


From ccfea3c98a10b9d4d49b899616a06594ec976d7d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 7 Feb 2008 00:16:39 -0800
Subject: Char: stallion, remove bottomhalf

- tty_hangup schedules a bottomhalf itself, tty_wakeup doesn't need it
- call the CD code (part of work handler previously) directly from the code
  (it wakes somebody up or calls tty_hangup at worse)

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/stallion.c  | 68 +++++++++++++++++-------------------------------
 include/linux/stallion.h |  1 -
 2 files changed, 24 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 5050aa5533a2..feac54e32a12 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -145,8 +145,7 @@ static struct stlbrd		*stl_brds[STL_MAXBRDS];
  */
 #define	ASYI_TXBUSY	1
 #define	ASYI_TXLOW	2
-#define	ASYI_DCDCHANGE	3
-#define	ASYI_TXFLOWED	4
+#define	ASYI_TXFLOWED	3
 
 /*
  *	Define an array of board names as printable strings. Handy for
@@ -610,6 +609,23 @@ static const struct file_operations	stl_fsiomem = {
 
 static struct class *stallion_class;
 
+static void stl_cd_change(struct stlport *portp)
+{
+	unsigned int oldsigs = portp->sigs;
+
+	if (!portp->tty)
+		return;
+
+	portp->sigs = stl_getsignals(portp);
+
+	if ((portp->sigs & TIOCM_CD) && ((oldsigs & TIOCM_CD) == 0))
+		wake_up_interruptible(&portp->open_wait);
+
+	if ((oldsigs & TIOCM_CD) && ((portp->sigs & TIOCM_CD) == 0))
+		if (portp->flags & ASYNC_CHECK_CD)
+			tty_hangup(portp->tty);
+}
+
 /*
  *	Check for any arguments passed in on the module load command line.
  */
@@ -1770,41 +1786,6 @@ static int stl_echpci64intr(struct stlbrd *brdp)
 
 /*****************************************************************************/
 
-/*
- *	Service an off-level request for some channel.
- */
-static void stl_offintr(struct work_struct *work)
-{
-	struct stlport		*portp = container_of(work, struct stlport, tqueue);
-	struct tty_struct	*tty;
-	unsigned int		oldsigs;
-
-	pr_debug("stl_offintr(portp=%p)\n", portp);
-
-	if (portp == NULL)
-		return;
-
-	tty = portp->tty;
-	if (tty == NULL)
-		return;
-
-	if (test_bit(ASYI_TXLOW, &portp->istate))
-		tty_wakeup(tty);
-
-	if (test_bit(ASYI_DCDCHANGE, &portp->istate)) {
-		clear_bit(ASYI_DCDCHANGE, &portp->istate);
-		oldsigs = portp->sigs;
-		portp->sigs = stl_getsignals(portp);
-		if ((portp->sigs & TIOCM_CD) && ((oldsigs & TIOCM_CD) == 0))
-			wake_up_interruptible(&portp->open_wait);
-		if ((oldsigs & TIOCM_CD) && ((portp->sigs & TIOCM_CD) == 0))
-			if (portp->flags & ASYNC_CHECK_CD)
-				tty_hangup(tty);	/* FIXME: module removal race here - AKPM */
-	}
-}
-
-/*****************************************************************************/
-
 /*
  *	Initialize all the ports on a panel.
  */
@@ -1840,7 +1821,6 @@ static int __devinit stl_initports(struct stlbrd *brdp, struct stlpanel *panelp)
 		portp->baud_base = STL_BAUDBASE;
 		portp->close_delay = STL_CLOSEDELAY;
 		portp->closing_wait = 30 * HZ;
-		INIT_WORK(&portp->tqueue, stl_offintr);
 		init_waitqueue_head(&portp->open_wait);
 		init_waitqueue_head(&portp->close_wait);
 		portp->stats.brd = portp->brdnr;
@@ -3530,7 +3510,8 @@ static void stl_cd1400txisr(struct stlpanel *panelp, int ioaddr)
 	if ((len == 0) || ((len < STL_TXBUFLOW) &&
 	    (test_bit(ASYI_TXLOW, &portp->istate) == 0))) {
 		set_bit(ASYI_TXLOW, &portp->istate);
-		schedule_work(&portp->tqueue);
+		if (portp->tty)
+			tty_wakeup(portp->tty);
 	}
 
 	if (len == 0) {
@@ -3693,8 +3674,7 @@ static void stl_cd1400mdmisr(struct stlpanel *panelp, int ioaddr)
 	outb((MISR + portp->uartaddr), ioaddr);
 	misr = inb(ioaddr + EREG_DATA);
 	if (misr & MISR_DCD) {
-		set_bit(ASYI_DCDCHANGE, &portp->istate);
-		schedule_work(&portp->tqueue);
+		stl_cd_change(portp);
 		portp->stats.modem++;
 	}
 
@@ -4448,7 +4428,8 @@ static void stl_sc26198txisr(struct stlport *portp)
 	if ((len == 0) || ((len < STL_TXBUFLOW) &&
 	    (test_bit(ASYI_TXLOW, &portp->istate) == 0))) {
 		set_bit(ASYI_TXLOW, &portp->istate);
-		schedule_work(&portp->tqueue); 
+		if (portp->tty)
+			tty_wakeup(portp->tty);
 	}
 
 	if (len == 0) {
@@ -4649,8 +4630,7 @@ static void stl_sc26198otherisr(struct stlport *portp, unsigned int iack)
 	case CIR_SUBCOS:
 		ipr = stl_sc26198getreg(portp, IPR);
 		if (ipr & IPR_DCDCHANGE) {
-			set_bit(ASYI_DCDCHANGE, &portp->istate);
-			schedule_work(&portp->tqueue); 
+			stl_cd_change(portp);
 			portp->stats.modem++;
 		}
 		break;
diff --git a/include/linux/stallion.h b/include/linux/stallion.h
index 94b4a10b912f..0424d75a5aaa 100644
--- a/include/linux/stallion.h
+++ b/include/linux/stallion.h
@@ -95,7 +95,6 @@ struct stlport {
 	struct tty_struct	*tty;
 	wait_queue_head_t	open_wait;
 	wait_queue_head_t	close_wait;
-	struct work_struct	tqueue;
 	comstats_t		stats;
 	struct stlrq		tx;
 };
-- 
cgit 


From 3099bbc59435928fbd1f4ebd835f825bca755bbb Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 7 Feb 2008 00:16:40 -0800
Subject: Char: serial167, remove bottomhalf

- Cy_EVENT_OPEN_WAKEUP is simple wake_up
- Cy_EVENT_HANGUP is wake_up + tty_hangup, which schedules its own work
- Cy_EVENT_WRITE_WAKEUP is tty_wakeup which may be called directly too

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/serial167.c  | 76 +++++------------------------------------------
 include/linux/serial167.h | 14 ---------
 2 files changed, 8 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index cbf21cc7b56d..df8cd0ca97eb 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -357,18 +357,6 @@ static void cy_start(struct tty_struct *tty)
 	local_irq_restore(flags);
 }				/* cy_start */
 
-/*
- * This routine is used by the interrupt handler to schedule
- * processing in the software interrupt portion of the driver
- * (also known as the "bottom half").  This can be called any
- * number of times for any channel without harm.
- */
-static inline void cy_sched_event(struct cyclades_port *info, int event)
-{
-	info->event |= 1 << event;	/* remember what kind of event and who */
-	schedule_work(&info->tqueue);
-}				/* cy_sched_event */
-
 /* The real interrupt service routines are called
    whenever the card wants its hand held--chars
    received, out buffer empty, modem change, etc.
@@ -483,10 +471,12 @@ static irqreturn_t cd2401_modem_interrupt(int irq, void *dev_id)
 		    && (info->flags & ASYNC_CHECK_CD)) {
 			if (mdm_status & CyDCD) {
 /* CP('!'); */
-				cy_sched_event(info, Cy_EVENT_OPEN_WAKEUP);
+				wake_up_interruptible(&info->open_wait);
 			} else {
 /* CP('@'); */
-				cy_sched_event(info, Cy_EVENT_HANGUP);
+				tty_hangup(info->tty);
+				wake_up_interruptible(&info->open_wait);
+				info->flags &= ~ASYNC_NORMAL_ACTIVE;
 			}
 		}
 		if ((mdm_change & CyCTS)
@@ -496,8 +486,7 @@ static irqreturn_t cd2401_modem_interrupt(int irq, void *dev_id)
 					/* !!! cy_start isn't used because... */
 					info->tty->stopped = 0;
 					base_addr[CyIER] |= CyTxMpty;
-					cy_sched_event(info,
-						       Cy_EVENT_WRITE_WAKEUP);
+					tty_wakeup(info->tty);
 				}
 			} else {
 				if (!(mdm_status & CyCTS)) {
@@ -543,9 +532,6 @@ static irqreturn_t cd2401_tx_interrupt(int irq, void *dev_id)
 	info->last_active = jiffies;
 	if (info->tty == 0) {
 		base_addr[CyIER] &= ~(CyTxMpty | CyTxRdy);
-		if (info->xmit_cnt < WAKEUP_CHARS) {
-			cy_sched_event(info, Cy_EVENT_WRITE_WAKEUP);
-		}
 		base_addr[CyTEOIR] = CyNOTRANS;
 		return IRQ_HANDLED;
 	}
@@ -627,9 +613,9 @@ static irqreturn_t cd2401_tx_interrupt(int irq, void *dev_id)
 		}
 	}
 
-	if (info->xmit_cnt < WAKEUP_CHARS) {
-		cy_sched_event(info, Cy_EVENT_WRITE_WAKEUP);
-	}
+	if (info->xmit_cnt < WAKEUP_CHARS)
+		tty_wakeup(info->tty);
+
 	base_addr[CyTEOIR] = (char_count != saved_cnt) ? 0 : CyNOTRANS;
 	return IRQ_HANDLED;
 }				/* cy_tx_interrupt */
@@ -690,49 +676,6 @@ static irqreturn_t cd2401_rx_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }				/* cy_rx_interrupt */
 
-/*
- * This routine is used to handle the "bottom half" processing for the
- * serial driver, known also the "software interrupt" processing.
- * This processing is done at the kernel interrupt level, after the
- * cy#/_interrupt() has returned, BUT WITH INTERRUPTS TURNED ON.  This
- * is where time-consuming activities which can not be done in the
- * interrupt driver proper are done; the interrupt driver schedules
- * them using cy_sched_event(), and they get done here.
- *
- * This is done through one level of indirection--the task queue.
- * When a hardware interrupt service routine wants service by the
- * driver's bottom half, it enqueues the appropriate tq_struct (one
- * per port) to the keventd work queue and sets a request flag
- * that the work queue be processed.
- *
- * Although this may seem unwieldy, it gives the system a way to
- * pass an argument (in this case the pointer to the cyclades_port
- * structure) to the bottom half of the driver.  Previous kernels
- * had to poll every port to see if that port needed servicing.
- */
-static void do_softint(struct work_struct *ugly_api)
-{
-	struct cyclades_port *info =
-	    container_of(ugly_api, struct cyclades_port, tqueue);
-	struct tty_struct *tty;
-
-	tty = info->tty;
-	if (!tty)
-		return;
-
-	if (test_and_clear_bit(Cy_EVENT_HANGUP, &info->event)) {
-		tty_hangup(info->tty);
-		wake_up_interruptible(&info->open_wait);
-		info->flags &= ~ASYNC_NORMAL_ACTIVE;
-	}
-	if (test_and_clear_bit(Cy_EVENT_OPEN_WAKEUP, &info->event)) {
-		wake_up_interruptible(&info->open_wait);
-	}
-	if (test_and_clear_bit(Cy_EVENT_WRITE_WAKEUP, &info->event)) {
-		tty_wakeup(tty);
-	}
-}				/* do_softint */
-
 /* This is called whenever a port becomes active;
    interrupts are enabled and DTR & RTS are turned on.
  */
@@ -1743,7 +1686,6 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 	if (tty->driver->flush_buffer)
 		tty->driver->flush_buffer(tty);
 	tty_ldisc_flush(tty);
-	info->event = 0;
 	info->tty = NULL;
 	if (info->blocked_open) {
 		if (info->close_delay) {
@@ -2234,7 +2176,6 @@ static int __init serial167_init(void)
 				info->rco = baud_co[DefSpeed] >> 5;	/* Rx CO */
 				info->close_delay = 0;
 				info->x_char = 0;
-				info->event = 0;
 				info->count = 0;
 #ifdef SERIAL_DEBUG_COUNT
 				printk("cyc: %d: setting count to 0\n",
@@ -2243,7 +2184,6 @@ static int __init serial167_init(void)
 				info->blocked_open = 0;
 				info->default_threshold = 0;
 				info->default_timeout = 0;
-				INIT_WORK(&info->tqueue, do_softint);
 				init_waitqueue_head(&info->open_wait);
 				init_waitqueue_head(&info->close_wait);
 				/* info->session */
diff --git a/include/linux/serial167.h b/include/linux/serial167.h
index 71b6df2516a6..59c81b708562 100644
--- a/include/linux/serial167.h
+++ b/include/linux/serial167.h
@@ -37,7 +37,6 @@ struct cyclades_port {
 	int			ignore_status_mask;
 	int			close_delay;
 	int			IER; 	/* Interrupt Enable Register */
-	unsigned long		event;
 	unsigned long		last_active;
 	int			count;	/* # of fd on device */
 	int                     x_char; /* to be pushed out ASAP */
@@ -49,7 +48,6 @@ struct cyclades_port {
 	int			xmit_cnt;
         int                     default_threshold;
         int                     default_timeout;
-	struct work_struct	tqueue;
 	wait_queue_head_t	open_wait;
 	wait_queue_head_t	close_wait;
         struct cyclades_monitor mon;
@@ -67,18 +65,6 @@ struct cyclades_port {
 #define CYGETDEFTIMEOUT         0x435908
 #define CYSETDEFTIMEOUT         0x435909
 
-/*
- * Events are used to schedule things to happen at timer-interrupt
- * time, instead of at cy interrupt time.
- */
-#define Cy_EVENT_READ_PROCESS	0
-#define Cy_EVENT_WRITE_WAKEUP	1
-#define Cy_EVENT_HANGUP		2
-#define Cy_EVENT_BREAK		3
-#define Cy_EVENT_OPEN_WAKEUP	4
-
-
-
 #define CyMaxChipsPerCard 1
 
 /**** cd2401 registers ****/
-- 
cgit 


From 151db1fc23800875c7ac353b106b7dab77061275 Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Fri, 8 Feb 2008 09:24:52 +1100
Subject: Fix compilation of powerpc asm-offsets.c with old gcc

Commit ad7f71674ad7c3c4467e48f6ab9e85516dae2720 ("[POWERPC] Use a
sensible default for clock_getres() in the VDSO") corrected the clock
resolution reported by the VDSO clock_getres() but introduced another
problem in that older versions of gcc (gcc-4.0 and earlier) fail to
compile the new code in arch/powerpc/kernel/asm-offsets.c.

This fixes it by introducing a new MONOTONIC_RES_NSEC define in the
generic code which is equivalent to KTIME_MONOTONIC_RES but is just an
integer constant, not a ktime union.

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 include/linux/hrtimer.h           | 5 ++++-
 include/linux/ktime.h             | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e6e49289f788..4b749c416464 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -313,7 +313,7 @@ int main(void)
 	DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
 	DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
 	DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-	DEFINE(CLOCK_REALTIME_RES, (KTIME_MONOTONIC_RES).tv64);
+	DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 
 #ifdef CONFIG_BUG
 	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 8371b664b41f..203591e23210 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -225,11 +225,14 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
  * idea of the (in)accuracy of timers. Timer values are rounded up to
  * this resolution values.
  */
-# define KTIME_HIGH_RES		(ktime_t) { .tv64 = 1 }
+# define HIGH_RES_NSEC		1
+# define KTIME_HIGH_RES		(ktime_t) { .tv64 = HIGH_RES_NSEC }
+# define MONOTONIC_RES_NSEC	HIGH_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_HIGH_RES
 
 #else
 
+# define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
 /*
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index a6ddec141f96..36c542b70c6d 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -316,7 +316,8 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
  * idea of the (in)accuracy of timers. Timer values are rounded up to
  * this resolution values.
  */
-#define KTIME_LOW_RES		(ktime_t){ .tv64 = TICK_NSEC }
+#define LOW_RES_NSEC		TICK_NSEC
+#define KTIME_LOW_RES		(ktime_t){ .tv64 = LOW_RES_NSEC }
 
 /* Get the monotonic time in timespec format: */
 extern void ktime_get_ts(struct timespec *ts);
-- 
cgit 


From d569d5bb3fd96d2907acaddd7c4ea5cb07d02ab8 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 3 Feb 2008 15:40:56 -0600
Subject: [SCSI] enclosure: add support for enclosure services

The enclosure misc device is really just a library providing sysfs
support for physical enclosure devices and their components.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/misc/Kconfig      |   9 +
 drivers/misc/Makefile     |   1 +
 drivers/misc/enclosure.c  | 484 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/enclosure.h | 129 ++++++++++++
 4 files changed, 623 insertions(+)
 create mode 100644 drivers/misc/enclosure.c
 create mode 100644 include/linux/enclosure.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 78cd33861766..7b5220ca7d7f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -285,4 +285,13 @@ config INTEL_MENLOW
 
 	  If unsure, say N.
 
+config ENCLOSURE_SERVICES
+	tristate "Enclosure Services"
+	default n
+	help
+	  Provides support for intelligent enclosures (bays which
+	  contain storage devices).  You also need either a host
+	  driver (SCSI/ATA) which supports enclosures
+	  or a SCSI enclosure device (SES) to use these services.
+
 endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 1f41654aae4d..7f13549cc87e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
 obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
 obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
 obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
+obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
new file mode 100644
index 000000000000..6fcb0e96adf4
--- /dev/null
+++ b/drivers/misc/enclosure.c
@@ -0,0 +1,484 @@
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+**  This program is free software; you can redistribute it and/or
+**  modify it under the terms of the GNU General Public License
+**  version 2 as published by the Free Software Foundation.
+**
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+**
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+*/
+#include <linux/device.h>
+#include <linux/enclosure.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+static LIST_HEAD(container_list);
+static DEFINE_MUTEX(container_list_lock);
+static struct class enclosure_class;
+static struct class enclosure_component_class;
+
+/**
+ * enclosure_find - find an enclosure given a device
+ * @dev:	the device to find for
+ *
+ * Looks through the list of registered enclosures to see
+ * if it can find a match for a device.  Returns NULL if no
+ * enclosure is found. Obtains a reference to the enclosure class
+ * device which must be released with class_device_put().
+ */
+struct enclosure_device *enclosure_find(struct device *dev)
+{
+	struct enclosure_device *edev = NULL;
+
+	mutex_lock(&container_list_lock);
+	list_for_each_entry(edev, &container_list, node) {
+		if (edev->cdev.dev == dev) {
+			class_device_get(&edev->cdev);
+			mutex_unlock(&container_list_lock);
+			return edev;
+		}
+	}
+	mutex_unlock(&container_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(enclosure_find);
+
+/**
+ * enclosure_for_each_device - calls a function for each enclosure
+ * @fn:		the function to call
+ * @data:	the data to pass to each call
+ *
+ * Loops over all the enclosures calling the function.
+ *
+ * Note, this function uses a mutex which will be held across calls to
+ * @fn, so it must have non atomic context, and @fn may (although it
+ * should not) sleep or otherwise cause the mutex to be held for
+ * indefinite periods
+ */
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+			      void *data)
+{
+	int error = 0;
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	list_for_each_entry(edev, &container_list, node) {
+		error = fn(edev, data);
+		if (error)
+			break;
+	}
+	mutex_unlock(&container_list_lock);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(enclosure_for_each_device);
+
+/**
+ * enclosure_register - register device as an enclosure
+ *
+ * @dev:	device containing the enclosure
+ * @components:	number of components in the enclosure
+ *
+ * This sets up the device for being an enclosure.  Note that @dev does
+ * not have to be a dedicated enclosure device.  It may be some other type
+ * of device that additionally responds to enclosure services
+ */
+struct enclosure_device *
+enclosure_register(struct device *dev, const char *name, int components,
+		   struct enclosure_component_callbacks *cb)
+{
+	struct enclosure_device *edev =
+		kzalloc(sizeof(struct enclosure_device) +
+			sizeof(struct enclosure_component)*components,
+			GFP_KERNEL);
+	int err, i;
+
+	BUG_ON(!cb);
+
+	if (!edev)
+		return ERR_PTR(-ENOMEM);
+
+	edev->components = components;
+
+	edev->cdev.class = &enclosure_class;
+	edev->cdev.dev = get_device(dev);
+	edev->cb = cb;
+	snprintf(edev->cdev.class_id, BUS_ID_SIZE, "%s", name);
+	err = class_device_register(&edev->cdev);
+	if (err)
+		goto err;
+
+	for (i = 0; i < components; i++)
+		edev->component[i].number = -1;
+
+	mutex_lock(&container_list_lock);
+	list_add_tail(&edev->node, &container_list);
+	mutex_unlock(&container_list_lock);
+
+	return edev;
+
+ err:
+	put_device(edev->cdev.dev);
+	kfree(edev);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(enclosure_register);
+
+static struct enclosure_component_callbacks enclosure_null_callbacks;
+
+/**
+ * enclosure_unregister - remove an enclosure
+ *
+ * @edev:	the registered enclosure to remove;
+ */
+void enclosure_unregister(struct enclosure_device *edev)
+{
+	int i;
+
+	mutex_lock(&container_list_lock);
+	list_del(&edev->node);
+	mutex_unlock(&container_list_lock);
+
+	for (i = 0; i < edev->components; i++)
+		if (edev->component[i].number != -1)
+			class_device_unregister(&edev->component[i].cdev);
+
+	/* prevent any callbacks into service user */
+	edev->cb = &enclosure_null_callbacks;
+	class_device_unregister(&edev->cdev);
+}
+EXPORT_SYMBOL_GPL(enclosure_unregister);
+
+static void enclosure_release(struct class_device *cdev)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	put_device(cdev->dev);
+	kfree(edev);
+}
+
+static void enclosure_component_release(struct class_device *cdev)
+{
+	if (cdev->dev)
+		put_device(cdev->dev);
+	class_device_put(cdev->parent);
+}
+
+/**
+ * enclosure_component_register - add a particular component to an enclosure
+ * @edev:	the enclosure to add the component
+ * @num:	the device number
+ * @type:	the type of component being added
+ * @name:	an optional name to appear in sysfs (leave NULL if none)
+ *
+ * Registers the component.  The name is optional for enclosures that
+ * give their components a unique name.  If not, leave the field NULL
+ * and a name will be assigned.
+ *
+ * Returns a pointer to the enclosure component or an error.
+ */
+struct enclosure_component *
+enclosure_component_register(struct enclosure_device *edev,
+			     unsigned int number,
+			     enum enclosure_component_type type,
+			     const char *name)
+{
+	struct enclosure_component *ecomp;
+	struct class_device *cdev;
+	int err;
+
+	if (number >= edev->components)
+		return ERR_PTR(-EINVAL);
+
+	ecomp = &edev->component[number];
+
+	if (ecomp->number != -1)
+		return ERR_PTR(-EINVAL);
+
+	ecomp->type = type;
+	ecomp->number = number;
+	cdev = &ecomp->cdev;
+	cdev->parent = class_device_get(&edev->cdev);
+	cdev->class = &enclosure_component_class;
+	if (name)
+		snprintf(cdev->class_id, BUS_ID_SIZE, "%s", name);
+	else
+		snprintf(cdev->class_id, BUS_ID_SIZE, "%u", number);
+
+	err = class_device_register(cdev);
+	if (err)
+		ERR_PTR(err);
+
+	return ecomp;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_register);
+
+/**
+ * enclosure_add_device - add a device as being part of an enclosure
+ * @edev:	the enclosure device being added to.
+ * @num:	the number of the component
+ * @dev:	the device being added
+ *
+ * Declares a real device to reside in slot (or identifier) @num of an
+ * enclosure.  This will cause the relevant sysfs links to appear.
+ * This function may also be used to change a device associated with
+ * an enclosure without having to call enclosure_remove_device() in
+ * between.
+ *
+ * Returns zero on success or an error.
+ */
+int enclosure_add_device(struct enclosure_device *edev, int component,
+			 struct device *dev)
+{
+	struct class_device *cdev;
+
+	if (!edev || component >= edev->components)
+		return -EINVAL;
+
+	cdev = &edev->component[component].cdev;
+
+	class_device_del(cdev);
+	if (cdev->dev)
+		put_device(cdev->dev);
+	cdev->dev = get_device(dev);
+	return class_device_add(cdev);
+}
+EXPORT_SYMBOL_GPL(enclosure_add_device);
+
+/**
+ * enclosure_remove_device - remove a device from an enclosure
+ * @edev:	the enclosure device
+ * @num:	the number of the component to remove
+ *
+ * Returns zero on success or an error.
+ *
+ */
+int enclosure_remove_device(struct enclosure_device *edev, int component)
+{
+	struct class_device *cdev;
+
+	if (!edev || component >= edev->components)
+		return -EINVAL;
+
+	cdev = &edev->component[component].cdev;
+
+	class_device_del(cdev);
+	if (cdev->dev)
+		put_device(cdev->dev);
+	cdev->dev = NULL;
+	return class_device_add(cdev);
+}
+EXPORT_SYMBOL_GPL(enclosure_remove_device);
+
+/*
+ * sysfs pieces below
+ */
+
+static ssize_t enclosure_show_components(struct class_device *cdev, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	return snprintf(buf, 40, "%d\n", edev->components);
+}
+
+static struct class_device_attribute enclosure_attrs[] = {
+	__ATTR(components, S_IRUGO, enclosure_show_components, NULL),
+	__ATTR_NULL
+};
+
+static struct class enclosure_class = {
+	.name			= "enclosure",
+	.owner			= THIS_MODULE,
+	.release		= enclosure_release,
+	.class_dev_attrs	= enclosure_attrs,
+};
+
+static const char *const enclosure_status [] = {
+	[ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported",
+	[ENCLOSURE_STATUS_OK] = "OK",
+	[ENCLOSURE_STATUS_CRITICAL] = "critical",
+	[ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical",
+	[ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable",
+	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
+	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
+	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+};
+
+static const char *const enclosure_type [] = {
+	[ENCLOSURE_COMPONENT_DEVICE] = "device",
+	[ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
+};
+
+static ssize_t get_component_fault(struct class_device *cdev, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_fault)
+		edev->cb->get_fault(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->fault);
+}
+
+static ssize_t set_component_fault(struct class_device *cdev, const char *buf,
+				   size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_fault)
+		edev->cb->set_fault(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_status(struct class_device *cdev, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_status)
+		edev->cb->get_status(edev, ecomp);
+	return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
+}
+
+static ssize_t set_component_status(struct class_device *cdev, const char *buf,
+				   size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int i;
+
+	for (i = 0; enclosure_status[i]; i++) {
+		if (strncmp(buf, enclosure_status[i],
+			    strlen(enclosure_status[i])) == 0 &&
+		    (buf[strlen(enclosure_status[i])] == '\n' ||
+		     buf[strlen(enclosure_status[i])] == '\0'))
+			break;
+	}
+
+	if (enclosure_status[i] && edev->cb->set_status) {
+		edev->cb->set_status(edev, ecomp, i);
+		return count;
+	} else
+		return -EINVAL;
+}
+
+static ssize_t get_component_active(struct class_device *cdev, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_active)
+		edev->cb->get_active(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->active);
+}
+
+static ssize_t set_component_active(struct class_device *cdev, const char *buf,
+				   size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_active)
+		edev->cb->set_active(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_locate(struct class_device *cdev, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_locate)
+		edev->cb->get_locate(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->locate);
+}
+
+static ssize_t set_component_locate(struct class_device *cdev, const char *buf,
+				   size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_locate)
+		edev->cb->set_locate(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_type(struct class_device *cdev, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]);
+}
+
+
+static struct class_device_attribute enclosure_component_attrs[] = {
+	__ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+	       set_component_fault),
+	__ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+	       set_component_status),
+	__ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+	       set_component_active),
+	__ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+	       set_component_locate),
+	__ATTR(type, S_IRUGO, get_component_type, NULL),
+	__ATTR_NULL
+};
+
+static struct class enclosure_component_class =  {
+	.name			= "enclosure_component",
+	.owner			= THIS_MODULE,
+	.class_dev_attrs	= enclosure_component_attrs,
+	.release		= enclosure_component_release,
+};
+
+static int __init enclosure_init(void)
+{
+	int err;
+
+	err = class_register(&enclosure_class);
+	if (err)
+		return err;
+	err = class_register(&enclosure_component_class);
+	if (err)
+		goto err_out;
+
+	return 0;
+ err_out:
+	class_unregister(&enclosure_class);
+
+	return err;
+}
+
+static void __exit enclosure_exit(void)
+{
+	class_unregister(&enclosure_component_class);
+	class_unregister(&enclosure_class);
+}
+
+module_init(enclosure_init);
+module_exit(enclosure_exit);
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("Enclosure Services");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
new file mode 100644
index 000000000000..a5978f18ca40
--- /dev/null
+++ b/include/linux/enclosure.h
@@ -0,0 +1,129 @@
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+**  This program is free software; you can redistribute it and/or
+**  modify it under the terms of the GNU General Public License
+**  version 2 as published by the Free Software Foundation.
+**
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+**
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+*/
+#ifndef _LINUX_ENCLOSURE_H_
+#define _LINUX_ENCLOSURE_H_
+
+#include <linux/device.h>
+#include <linux/list.h>
+
+/* A few generic types ... taken from ses-2 */
+enum enclosure_component_type {
+	ENCLOSURE_COMPONENT_DEVICE = 0x01,
+	ENCLOSURE_COMPONENT_ARRAY_DEVICE = 0x17,
+};
+
+/* ses-2 common element status */
+enum enclosure_status {
+	ENCLOSURE_STATUS_UNSUPPORTED = 0,
+	ENCLOSURE_STATUS_OK,
+	ENCLOSURE_STATUS_CRITICAL,
+	ENCLOSURE_STATUS_NON_CRITICAL,
+	ENCLOSURE_STATUS_UNRECOVERABLE,
+	ENCLOSURE_STATUS_NOT_INSTALLED,
+	ENCLOSURE_STATUS_UNKNOWN,
+	ENCLOSURE_STATUS_UNAVAILABLE,
+};
+
+/* SFF-8485 activity light settings */
+enum enclosure_component_setting {
+	ENCLOSURE_SETTING_DISABLED = 0,
+	ENCLOSURE_SETTING_ENABLED = 1,
+	ENCLOSURE_SETTING_BLINK_A_ON_OFF = 2,
+	ENCLOSURE_SETTING_BLINK_A_OFF_ON = 3,
+	ENCLOSURE_SETTING_BLINK_B_ON_OFF = 6,
+	ENCLOSURE_SETTING_BLINK_B_OFF_ON = 7,
+};
+
+struct enclosure_device;
+struct enclosure_component;
+struct enclosure_component_callbacks {
+	void (*get_status)(struct enclosure_device *,
+			     struct enclosure_component *);
+	int (*set_status)(struct enclosure_device *,
+			  struct enclosure_component *,
+			  enum enclosure_status);
+	void (*get_fault)(struct enclosure_device *,
+			  struct enclosure_component *);
+	int (*set_fault)(struct enclosure_device *,
+			 struct enclosure_component *,
+			 enum enclosure_component_setting);
+	void (*get_active)(struct enclosure_device *,
+			   struct enclosure_component *);
+	int (*set_active)(struct enclosure_device *,
+			  struct enclosure_component *,
+			  enum enclosure_component_setting);
+	void (*get_locate)(struct enclosure_device *,
+			   struct enclosure_component *);
+	int (*set_locate)(struct enclosure_device *,
+			  struct enclosure_component *,
+			  enum enclosure_component_setting);
+};
+
+
+struct enclosure_component {
+	void *scratch;
+	struct class_device cdev;
+	enum enclosure_component_type type;
+	int number;
+	int fault;
+	int active;
+	int locate;
+	enum enclosure_status status;
+};
+
+struct enclosure_device {
+	void *scratch;
+	struct list_head node;
+	struct class_device cdev;
+	struct enclosure_component_callbacks *cb;
+	int components;
+	struct enclosure_component component[0];
+};
+
+static inline struct enclosure_device *
+to_enclosure_device(struct class_device *dev)
+{
+	return container_of(dev, struct enclosure_device, cdev);
+}
+
+static inline struct enclosure_component *
+to_enclosure_component(struct class_device *dev)
+{
+	return container_of(dev, struct enclosure_component, cdev);
+}
+
+struct enclosure_device *
+enclosure_register(struct device *, const char *, int,
+		   struct enclosure_component_callbacks *);
+void enclosure_unregister(struct enclosure_device *);
+struct enclosure_component *
+enclosure_component_register(struct enclosure_device *, unsigned int,
+				 enum enclosure_component_type, const char *);
+int enclosure_add_device(struct enclosure_device *enclosure, int component,
+			 struct device *dev);
+int enclosure_remove_device(struct enclosure_device *enclosure, int component);
+struct enclosure_device *enclosure_find(struct device *dev);
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+			      void *data);
+
+#endif /* _LINUX_ENCLOSURE_H_ */
-- 
cgit 


From 7fce084a0b3e2bb8caef919f8f36065953655bb5 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 3 Nov 2007 17:29:20 +0100
Subject: dmi: Let drivers walk the DMI table

Let drivers walk the DMI table for their own needs. Some drivers need
data stored in OEM-specific DMI records for proper operation.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
---
 drivers/firmware/dmi_scan.c | 62 +++++++++++++++++++++++++++++++++++----------
 include/linux/dmi.h         |  3 +++
 2 files changed, 51 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index e0bade732376..1412d7bcdbd1 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -43,18 +43,12 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s)
  *	We have to be cautious here. We have seen BIOSes with DMI pointers
  *	pointing to completely the wrong place for example
  */
-static int __init dmi_table(u32 base, int len, int num,
-			    void (*decode)(const struct dmi_header *))
+static void dmi_table(u8 *buf, int len, int num,
+		      void (*decode)(const struct dmi_header *))
 {
-	u8 *buf, *data;
+	u8 *data = buf;
 	int i = 0;
 
-	buf = dmi_ioremap(base, len);
-	if (buf == NULL)
-		return -1;
-
-	data = buf;
-
 	/*
 	 *	Stop when we see all the items the table claimed to have
 	 *	OR we run off the end of the table (also happens)
@@ -75,7 +69,23 @@ static int __init dmi_table(u32 base, int len, int num,
 		data += 2;
 		i++;
 	}
-	dmi_iounmap(buf, len);
+}
+
+static u32 dmi_base;
+static u16 dmi_len;
+static u16 dmi_num;
+
+static int __init dmi_walk_early(void (*decode)(const struct dmi_header *))
+{
+	u8 *buf;
+
+	buf = dmi_ioremap(dmi_base, dmi_len);
+	if (buf == NULL)
+		return -1;
+
+	dmi_table(buf, dmi_len, dmi_num, decode);
+
+	dmi_iounmap(buf, dmi_len);
 	return 0;
 }
 
@@ -291,9 +301,9 @@ static int __init dmi_present(const char __iomem *p)
 
 	memcpy_fromio(buf, p, 15);
 	if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
-		u16 num = (buf[13] << 8) | buf[12];
-		u16 len = (buf[7] << 8) | buf[6];
-		u32 base = (buf[11] << 24) | (buf[10] << 16) |
+		dmi_num = (buf[13] << 8) | buf[12];
+		dmi_len = (buf[7] << 8) | buf[6];
+		dmi_base = (buf[11] << 24) | (buf[10] << 16) |
 			(buf[9] << 8) | buf[8];
 
 		/*
@@ -305,7 +315,7 @@ static int __init dmi_present(const char __iomem *p)
 			       buf[14] >> 4, buf[14] & 0xF);
 		else
 			printk(KERN_INFO "DMI present.\n");
-		if (dmi_table(base,len, num, dmi_decode) == 0)
+		if (dmi_walk_early(dmi_decode) == 0)
 			return 0;
 	}
 	return 1;
@@ -489,3 +499,27 @@ int dmi_get_year(int field)
 
 	return year;
 }
+
+/**
+ *	dmi_walk - Walk the DMI table and get called back for every record
+ *	@decode: Callback function
+ *
+ *	Returns -1 when the DMI table can't be reached, 0 on success.
+ */
+int dmi_walk(void (*decode)(const struct dmi_header *))
+{
+	u8 *buf;
+
+	if (!dmi_available)
+		return -1;
+
+	buf = ioremap(dmi_base, dmi_len);
+	if (buf == NULL)
+		return -1;
+
+	dmi_table(buf, dmi_len, dmi_num, decode);
+
+	iounmap(buf);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dmi_walk);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index b1251b2af568..bbc9992ec374 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -79,6 +79,7 @@ extern void dmi_scan_machine(void);
 extern int dmi_get_year(int field);
 extern int dmi_name_in_vendors(const char *str);
 extern int dmi_available;
+extern int dmi_walk(void (*decode)(const struct dmi_header *));
 
 #else
 
@@ -89,6 +90,8 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na
 static inline int dmi_get_year(int year) { return 0; }
 static inline int dmi_name_in_vendors(const char *s) { return 0; }
 #define dmi_available 0
+static inline int dmi_walk(void (*decode)(const struct dmi_header *))
+	{ return -1; }
 
 #endif
 
-- 
cgit 


From a5955ed27497385cc491ecbe40af45951e830881 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 22 Oct 2007 17:45:08 +0200
Subject: hwmon: (gl518sm) Various cleanups

* Drop history, it doesn't belong there
* Drop unused struct member
* Drop bogus struct member comment
* Drop unused driver ID
* Rename new_client to client
* Drop redundant initializations to 0
* Drop useless cast
* Drop trailing space
* Fix comment
* Drop duplicate comment

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
---
 drivers/hwmon/gl518sm.c | 49 +++++++++++++++++++------------------------------
 include/linux/i2c-id.h  |  1 -
 2 files changed, 19 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c
index bb58d9866a37..842252eccdaa 100644
--- a/drivers/hwmon/gl518sm.c
+++ b/drivers/hwmon/gl518sm.c
@@ -30,10 +30,6 @@
  * We did not keep that part of the original driver in the Linux 2.6
  * version, since it was making the driver significantly more complex
  * with no real benefit.
- *
- * History:
- * 2004-01-28  Original port. (Hong-Gunn Chew)
- * 2004-01-31  Code review and approval. (Jean Delvare)
  */
 
 #include <linux/module.h>
@@ -129,7 +125,6 @@ struct gl518_data {
 	u8 voltage_in[4];	/* Register values; [0] = VDD */
 	u8 voltage_min[4];	/* Register values; [0] = VDD */
 	u8 voltage_max[4];	/* Register values; [0] = VDD */
-	u8 iter_voltage_in[4];	/* Register values; [0] = VDD */
 	u8 fan_in[2];
 	u8 fan_min[2];
 	u8 fan_div[2];		/* Register encoding, shifted right */
@@ -138,7 +133,7 @@ struct gl518_data {
 	u8 temp_max;		/* Register values */
 	u8 temp_hyst;		/* Register values */
 	u8 alarms;		/* Register value */
-	u8 alarm_mask;		/* Register value */
+	u8 alarm_mask;
 	u8 beep_mask;		/* Register value */
 	u8 beep_enable;		/* Boolean */
 };
@@ -156,7 +151,6 @@ static struct i2c_driver gl518_driver = {
 	.driver = {
 		.name	= "gl518sm",
 	},
-	.id		= I2C_DRIVERID_GL518,
 	.attach_adapter	= gl518_attach_adapter,
 	.detach_client	= gl518_detach_client,
 };
@@ -391,7 +385,7 @@ static int gl518_attach_adapter(struct i2c_adapter *adapter)
 static int gl518_detect(struct i2c_adapter *adapter, int address, int kind)
 {
 	int i;
-	struct i2c_client *new_client;
+	struct i2c_client *client;
 	struct gl518_data *data;
 	int err = 0;
 
@@ -408,25 +402,24 @@ static int gl518_detect(struct i2c_adapter *adapter, int address, int kind)
 		goto exit;
 	}
 
-	new_client = &data->client;
-	i2c_set_clientdata(new_client, data);
+	client = &data->client;
+	i2c_set_clientdata(client, data);
 
-	new_client->addr = address;
-	new_client->adapter = adapter;
-	new_client->driver = &gl518_driver;
-	new_client->flags = 0;
+	client->addr = address;
+	client->adapter = adapter;
+	client->driver = &gl518_driver;
 
 	/* Now, we do the remaining detection. */
 
 	if (kind < 0) {
-		if ((gl518_read_value(new_client, GL518_REG_CHIP_ID) != 0x80)
-		 || (gl518_read_value(new_client, GL518_REG_CONF) & 0x80))
+		if ((gl518_read_value(client, GL518_REG_CHIP_ID) != 0x80)
+		 || (gl518_read_value(client, GL518_REG_CONF) & 0x80))
 			goto exit_free;
 	}
 
 	/* Determine the chip type. */
 	if (kind <= 0) {
-		i = gl518_read_value(new_client, GL518_REG_REVISION);
+		i = gl518_read_value(client, GL518_REG_REVISION);
 		if (i == 0x00) {
 			kind = gl518sm_r00;
 		} else if (i == 0x80) {
@@ -442,25 +435,24 @@ static int gl518_detect(struct i2c_adapter *adapter, int address, int kind)
 	}
 
 	/* Fill in the remaining client fields */
-	strlcpy(new_client->name, "gl518sm", I2C_NAME_SIZE);
+	strlcpy(client->name, "gl518sm", I2C_NAME_SIZE);
 	data->type = kind;
-	data->valid = 0;
 	mutex_init(&data->update_lock);
 
 	/* Tell the I2C layer a new client has arrived */
-	if ((err = i2c_attach_client(new_client)))
+	if ((err = i2c_attach_client(client)))
 		goto exit_free;
 
 	/* Initialize the GL518SM chip */
 	data->alarm_mask = 0xff;
 	data->voltage_in[0]=data->voltage_in[1]=data->voltage_in[2]=0;
-	gl518_init_client((struct i2c_client *) new_client);
+	gl518_init_client(client);
 
 	/* Register sysfs hooks */
-	if ((err = sysfs_create_group(&new_client->dev.kobj, &gl518_group)))
+	if ((err = sysfs_create_group(&client->dev.kobj, &gl518_group)))
 		goto exit_detach;
 
-	data->hwmon_dev = hwmon_device_register(&new_client->dev);
+	data->hwmon_dev = hwmon_device_register(&client->dev);
 	if (IS_ERR(data->hwmon_dev)) {
 		err = PTR_ERR(data->hwmon_dev);
 		goto exit_remove_files;
@@ -469,9 +461,9 @@ static int gl518_detect(struct i2c_adapter *adapter, int address, int kind)
 	return 0;
 
 exit_remove_files:
-	sysfs_remove_group(&new_client->dev.kobj, &gl518_group);
+	sysfs_remove_group(&client->dev.kobj, &gl518_group);
 exit_detach:
-	i2c_detach_client(new_client);
+	i2c_detach_client(client);
 exit_free:
 	kfree(data);
 exit:
@@ -512,9 +504,9 @@ static int gl518_detach_client(struct i2c_client *client)
 	return 0;
 }
 
-/* Registers 0x07 to 0x0c are word-sized, others are byte-sized 
+/* Registers 0x07 to 0x0c are word-sized, others are byte-sized
    GL518 uses a high-byte first convention, which is exactly opposite to
-   the usual practice. */
+   the SMBus standard. */
 static int gl518_read_value(struct i2c_client *client, u8 reg)
 {
 	if ((reg >= 0x07) && (reg <= 0x0c))
@@ -523,9 +515,6 @@ static int gl518_read_value(struct i2c_client *client, u8 reg)
 		return i2c_smbus_read_byte_data(client, reg);
 }
 
-/* Registers 0x07 to 0x0c are word-sized, others are byte-sized 
-   GL518 uses a high-byte first convention, which is exactly opposite to
-   the usual practice. */
 static int gl518_write_value(struct i2c_client *client, u8 reg, u16 value)
 {
 	if ((reg >= 0x07) && (reg <= 0x0c))
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index f922b060158b..d33e16a3f544 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -100,7 +100,6 @@
    These were originally in sensors.h in the lm_sensors package */
 #define I2C_DRIVERID_LM78 1002
 #define I2C_DRIVERID_LM75 1003
-#define I2C_DRIVERID_GL518 1004
 #define I2C_DRIVERID_EEPROM 1005
 #define I2C_DRIVERID_W83781D 1006
 #define I2C_DRIVERID_LM80 1007
-- 
cgit 


From f28dc2f78bbcbf07b8a084fcd89cf6b4256f0664 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 4 Nov 2007 23:44:23 +0100
Subject: hwmon: (gl520sm) Various cleanups

* Drop trailing spaces
* Drop unused driver ID
* Drop stray backslashes in macros
* Rename new_client to client
* Drop redundant initializations to 0

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
---
 drivers/hwmon/gl520sm.c | 63 +++++++++++++++++++++++--------------------------
 include/linux/i2c-id.h  |  1 -
 2 files changed, 30 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index 2d39d8fc2389..4fdfe820294b 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -43,9 +43,9 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
 /* Insmod parameters */
 I2C_CLIENT_INSMOD_1(gl520sm);
 
-/* Many GL520 constants specified below 
+/* Many GL520 constants specified below
 One of the inputs can be configured as either temp or voltage.
-That's why _TEMP2 and _IN4 access the same register 
+That's why _TEMP2 and _IN4 access the same register
 */
 
 /* The GL520 registers */
@@ -114,7 +114,6 @@ static struct i2c_driver gl520_driver = {
 	.driver = {
 		.name	= "gl520sm",
 	},
-	.id		= I2C_DRIVERID_GL520,
 	.attach_adapter	= gl520_attach_adapter,
 	.detach_client	= gl520_detach_client,
 };
@@ -194,7 +193,7 @@ sysfs_ro_n(cpu, n, _vid, GL520_REG_VID_INPUT)
 #define sysfs_in(n) \
 sysfs_ro_n(in, n, _input, GL520_REG_IN##n##INPUT) \
 sysfs_rw_n(in, n, _min, GL520_REG_IN##n##_MIN) \
-sysfs_rw_n(in, n, _max, GL520_REG_IN##n##_MAX) \
+sysfs_rw_n(in, n, _max, GL520_REG_IN##n##_MAX)
 
 #define sysfs_fan(n) \
 sysfs_ro_n(fan, n, _input, GL520_REG_FAN_INPUT) \
@@ -202,7 +201,7 @@ sysfs_rw_n(fan, n, _min, GL520_REG_FAN_MIN) \
 sysfs_rw_n(fan, n, _div, GL520_REG_FAN_DIV)
 
 #define sysfs_fan_off(n) \
-sysfs_rw_n(fan, n, _off, GL520_REG_FAN_OFF) \
+sysfs_rw_n(fan, n, _off, GL520_REG_FAN_OFF)
 
 #define sysfs_temp(n) \
 sysfs_ro_n(temp, n, _input, GL520_REG_TEMP##n##_INPUT) \
@@ -477,7 +476,7 @@ static ssize_t set_beep_enable(struct i2c_client *client, struct gl520_data *dat
 static ssize_t set_beep_mask(struct i2c_client *client, struct gl520_data *data, const char *buf, size_t count, int n, int reg)
 {
 	u8 r = simple_strtoul(buf, NULL, 10);
-	
+
 	mutex_lock(&data->update_lock);
 	r &= data->alarm_mask;
 	data->beep_mask = r;
@@ -553,7 +552,7 @@ static int gl520_attach_adapter(struct i2c_adapter *adapter)
 
 static int gl520_detect(struct i2c_adapter *adapter, int address, int kind)
 {
-	struct i2c_client *new_client;
+	struct i2c_client *client;
 	struct gl520_data *data;
 	int err = 0;
 
@@ -570,59 +569,57 @@ static int gl520_detect(struct i2c_adapter *adapter, int address, int kind)
 		goto exit;
 	}
 
-	new_client = &data->client;
-	i2c_set_clientdata(new_client, data);
-	new_client->addr = address;
-	new_client->adapter = adapter;
-	new_client->driver = &gl520_driver;
-	new_client->flags = 0;
+	client = &data->client;
+	i2c_set_clientdata(client, data);
+	client->addr = address;
+	client->adapter = adapter;
+	client->driver = &gl520_driver;
 
 	/* Determine the chip type. */
 	if (kind < 0) {
-		if ((gl520_read_value(new_client, GL520_REG_CHIP_ID) != 0x20) ||
-		    ((gl520_read_value(new_client, GL520_REG_REVISION) & 0x7f) != 0x00) ||
-		    ((gl520_read_value(new_client, GL520_REG_CONF) & 0x80) != 0x00)) {
-			dev_dbg(&new_client->dev, "Unknown chip type, skipping\n");
+		if ((gl520_read_value(client, GL520_REG_CHIP_ID) != 0x20) ||
+		    ((gl520_read_value(client, GL520_REG_REVISION) & 0x7f) != 0x00) ||
+		    ((gl520_read_value(client, GL520_REG_CONF) & 0x80) != 0x00)) {
+			dev_dbg(&client->dev, "Unknown chip type, skipping\n");
 			goto exit_free;
 		}
 	}
 
 	/* Fill in the remaining client fields */
-	strlcpy(new_client->name, "gl520sm", I2C_NAME_SIZE);
-	data->valid = 0;
+	strlcpy(client->name, "gl520sm", I2C_NAME_SIZE);
 	mutex_init(&data->update_lock);
 
 	/* Tell the I2C layer a new client has arrived */
-	if ((err = i2c_attach_client(new_client)))
+	if ((err = i2c_attach_client(client)))
 		goto exit_free;
 
 	/* Initialize the GL520SM chip */
-	gl520_init_client(new_client);
+	gl520_init_client(client);
 
 	/* Register sysfs hooks */
-	if ((err = sysfs_create_group(&new_client->dev.kobj, &gl520_group)))
+	if ((err = sysfs_create_group(&client->dev.kobj, &gl520_group)))
 		goto exit_detach;
 
 	if (data->two_temps) {
-		if ((err = device_create_file(&new_client->dev,
+		if ((err = device_create_file(&client->dev,
 					      &dev_attr_temp2_input))
-		 || (err = device_create_file(&new_client->dev,
+		 || (err = device_create_file(&client->dev,
 					      &dev_attr_temp2_max))
-		 || (err = device_create_file(&new_client->dev,
+		 || (err = device_create_file(&client->dev,
 					      &dev_attr_temp2_max_hyst)))
 			goto exit_remove_files;
 	} else {
-		if ((err = device_create_file(&new_client->dev,
+		if ((err = device_create_file(&client->dev,
 					      &dev_attr_in4_input))
-		 || (err = device_create_file(&new_client->dev,
+		 || (err = device_create_file(&client->dev,
 					      &dev_attr_in4_min))
-		 || (err = device_create_file(&new_client->dev,
+		 || (err = device_create_file(&client->dev,
 					      &dev_attr_in4_max)))
 			goto exit_remove_files;
 	}
 
 
-	data->hwmon_dev = hwmon_device_register(&new_client->dev);
+	data->hwmon_dev = hwmon_device_register(&client->dev);
 	if (IS_ERR(data->hwmon_dev)) {
 		err = PTR_ERR(data->hwmon_dev);
 		goto exit_remove_files;
@@ -631,10 +628,10 @@ static int gl520_detect(struct i2c_adapter *adapter, int address, int kind)
 	return 0;
 
 exit_remove_files:
-	sysfs_remove_group(&new_client->dev.kobj, &gl520_group);
-	sysfs_remove_group(&new_client->dev.kobj, &gl520_group_opt);
+	sysfs_remove_group(&client->dev.kobj, &gl520_group);
+	sysfs_remove_group(&client->dev.kobj, &gl520_group_opt);
 exit_detach:
-	i2c_detach_client(new_client);
+	i2c_detach_client(client);
 exit_free:
 	kfree(data);
 exit:
@@ -697,7 +694,7 @@ static int gl520_detach_client(struct i2c_client *client)
 }
 
 
-/* Registers 0x07 to 0x0c are word-sized, others are byte-sized 
+/* Registers 0x07 to 0x0c are word-sized, others are byte-sized
    GL520 uses a high-byte first convention */
 static int gl520_read_value(struct i2c_client *client, u8 reg)
 {
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index d33e16a3f544..efc74c0fda56 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -109,7 +109,6 @@
 #define I2C_DRIVERID_BT869 1013
 #define I2C_DRIVERID_MAXILIFE 1014
 #define I2C_DRIVERID_MATORB 1015
-#define I2C_DRIVERID_GL520 1016
 #define I2C_DRIVERID_THMC50 1017
 #define I2C_DRIVERID_ADM1025 1020
 #define I2C_DRIVERID_LM87 1021
-- 
cgit 


From 293c09971631d22f8e91402f58955ccaada9dbde Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 30 Nov 2007 23:52:44 +0100
Subject: hwmon: (w83781d) Misc cleanups

* Drop unused defines
* Drop unused driver ID
* Remove trailing whitespace

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
---
 drivers/hwmon/w83781d.c | 13 ++++---------
 include/linux/i2c-id.h  |  1 -
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 8d4d1acbf650..7421f6ea53e1 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -151,10 +151,6 @@ static const u8 BIT_SCFG2[] = { 0x10, 0x20, 0x40 };
 
 #define W83781D_DEFAULT_BETA		3435
 
-/* RT Table registers */
-#define W83781D_REG_RT_IDX		0x50
-#define W83781D_REG_RT_VAL		0x51
-
 /* Conversions */
 #define IN_TO_REG(val)			SENSORS_LIMIT(((val) + 8) / 16, 0, 255)
 #define IN_FROM_REG(val)		((val) * 16)
@@ -269,7 +265,6 @@ static struct i2c_driver w83781d_driver = {
 	.driver = {
 		.name = "w83781d",
 	},
-	.id = I2C_DRIVERID_W83781D,
 	.attach_adapter = w83781d_attach_adapter,
 	.detach_client = w83781d_detach_client,
 };
@@ -694,7 +689,7 @@ store_fan_div(struct device *dev, struct device_attribute *da,
 	unsigned long val = simple_strtoul(buf, NULL, 10);
 
 	mutex_lock(&data->update_lock);
-	
+
 	/* Save fan_min */
 	min = FAN_FROM_REG(data->fan_min[nr],
 			   DIV_FROM_REG(data->fan_div[nr]));
@@ -1000,7 +995,7 @@ ERROR_SC_0:
 #define IN_UNIT_ATTRS(X)					\
 	&sensor_dev_attr_in##X##_input.dev_attr.attr,		\
 	&sensor_dev_attr_in##X##_min.dev_attr.attr,		\
-	&sensor_dev_attr_in##X##_max.dev_attr.attr, 		\
+	&sensor_dev_attr_in##X##_max.dev_attr.attr,		\
 	&sensor_dev_attr_in##X##_alarm.dev_attr.attr,		\
 	&sensor_dev_attr_in##X##_beep.dev_attr.attr
 
@@ -1441,9 +1436,9 @@ w83781d_isa_remove(struct platform_device *pdev)
 }
 
 /* The SMBus locks itself, usually, but nothing may access the Winbond between
-   bank switches. ISA access must always be locked explicitly! 
+   bank switches. ISA access must always be locked explicitly!
    We ignore the W83781D BUSY flag at this moment - it could lead to deadlocks,
-   would slow down the W83781D access and should not be necessary. 
+   would slow down the W83781D access and should not be necessary.
    There are some ugly typecasts here, but the good news is - they should
    nowhere else be necessary! */
 static int
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index efc74c0fda56..dc373e5eb262 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -101,7 +101,6 @@
 #define I2C_DRIVERID_LM78 1002
 #define I2C_DRIVERID_LM75 1003
 #define I2C_DRIVERID_EEPROM 1005
-#define I2C_DRIVERID_W83781D 1006
 #define I2C_DRIVERID_LM80 1007
 #define I2C_DRIVERID_ADM1021 1008
 #define I2C_DRIVERID_ADM9240 1009
-- 
cgit 


From 7b501b1f53605bec17454dd8bbdbbf3f57a7cf32 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 3 Jan 2008 19:44:09 +0100
Subject: hwmon: Discard useless I2C driver IDs

Many I2C hwmon drivers define a driver ID but no other code references
these, meaning that they are useless. Discard them, along with a few
IDs which are defined but never used at all.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mark M. Hoffman <mhoffman@lightlink.com>
---
 drivers/hwmon/adm1021.c     |  1 -
 drivers/hwmon/adm1025.c     |  1 -
 drivers/hwmon/adm9240.c     |  1 -
 drivers/hwmon/asb100.c      |  1 -
 drivers/hwmon/ds1621.c      |  1 -
 drivers/hwmon/fscher.c      |  1 -
 drivers/hwmon/fscpos.c      |  1 -
 drivers/hwmon/lm75.c        |  1 -
 drivers/hwmon/lm78.c        |  1 -
 drivers/hwmon/lm80.c        |  1 -
 drivers/hwmon/lm83.c        |  1 -
 drivers/hwmon/lm85.c        |  1 -
 drivers/hwmon/lm87.c        |  1 -
 drivers/hwmon/lm90.c        |  1 -
 drivers/hwmon/lm92.c        |  1 -
 drivers/hwmon/w83l785ts.c   |  1 -
 drivers/i2c/chips/eeprom.c  |  1 -
 drivers/i2c/chips/pcf8574.c |  1 -
 drivers/i2c/chips/pcf8591.c |  1 -
 include/linux/i2c-id.h      | 33 ---------------------------------
 20 files changed, 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index ebdc6d7db231..b96be772e498 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -115,7 +115,6 @@ static struct i2c_driver adm1021_driver = {
 	.driver = {
 		.name	= "adm1021",
 	},
-	.id		= I2C_DRIVERID_ADM1021,
 	.attach_adapter	= adm1021_attach_adapter,
 	.detach_client	= adm1021_detach_client,
 };
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index c7a365a9e405..33cc58b2fadb 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -123,7 +123,6 @@ static struct i2c_driver adm1025_driver = {
 	.driver = {
 		.name	= "adm1025",
 	},
-	.id		= I2C_DRIVERID_ADM1025,
 	.attach_adapter	= adm1025_attach_adapter,
 	.detach_client	= adm1025_detach_client,
 };
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index c17d0b6b3283..7887192c4236 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -141,7 +141,6 @@ static struct i2c_driver adm9240_driver = {
 	.driver = {
 		.name	= "adm9240",
 	},
-	.id		= I2C_DRIVERID_ADM9240,
 	.attach_adapter	= adm9240_attach_adapter,
 	.detach_client	= adm9240_detach_client,
 };
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index 9460dba4cf74..815493b9788d 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -221,7 +221,6 @@ static struct i2c_driver asb100_driver = {
 	.driver = {
 		.name	= "asb100",
 	},
-	.id		= I2C_DRIVERID_ASB100,
 	.attach_adapter	= asb100_attach_adapter,
 	.detach_client	= asb100_detach_client,
 };
diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c
index b7bd000b130f..3f5163de13c1 100644
--- a/drivers/hwmon/ds1621.c
+++ b/drivers/hwmon/ds1621.c
@@ -94,7 +94,6 @@ static struct i2c_driver ds1621_driver = {
 	.driver = {
 		.name	= "ds1621",
 	},
-	.id		= I2C_DRIVERID_DS1621,
 	.attach_adapter	= ds1621_attach_adapter,
 	.detach_client	= ds1621_detach_client,
 };
diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c
index e67c36953b2d..721c70177b17 100644
--- a/drivers/hwmon/fscher.c
+++ b/drivers/hwmon/fscher.c
@@ -123,7 +123,6 @@ static struct i2c_driver fscher_driver = {
 	.driver = {
 		.name	= "fscher",
 	},
-	.id		= I2C_DRIVERID_FSCHER,
 	.attach_adapter	= fscher_attach_adapter,
 	.detach_client	= fscher_detach_client,
 };
diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c
index 92c9703d0ac0..2f1075323a1e 100644
--- a/drivers/hwmon/fscpos.c
+++ b/drivers/hwmon/fscpos.c
@@ -105,7 +105,6 @@ static struct i2c_driver fscpos_driver = {
 	.driver = {
 		.name	= "fscpos",
 	},
-	.id		= I2C_DRIVERID_FSCPOS,
 	.attach_adapter	= fscpos_attach_adapter,
 	.detach_client	= fscpos_detach_client,
 };
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 37a8cc032ffa..e5c35a355a57 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -74,7 +74,6 @@ static struct i2c_driver lm75_driver = {
 	.driver = {
 		.name	= "lm75",
 	},
-	.id		= I2C_DRIVERID_LM75,
 	.attach_adapter	= lm75_attach_adapter,
 	.detach_client	= lm75_detach_client,
 };
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index 934378eb4f8f..0a9eb1f6f4e4 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -168,7 +168,6 @@ static struct i2c_driver lm78_driver = {
 	.driver = {
 		.name	= "lm78",
 	},
-	.id		= I2C_DRIVERID_LM78,
 	.attach_adapter	= lm78_attach_adapter,
 	.detach_client	= lm78_detach_client,
 };
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index 063cdba00a88..0bde0a382ef1 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -147,7 +147,6 @@ static struct i2c_driver lm80_driver = {
 	.driver = {
 		.name	= "lm80",
 	},
-	.id		= I2C_DRIVERID_LM80,
 	.attach_adapter	= lm80_attach_adapter,
 	.detach_client	= lm80_detach_client,
 };
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 0336b4572a61..6e8903a6e902 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -133,7 +133,6 @@ static struct i2c_driver lm83_driver = {
 	.driver = {
 		.name	= "lm83",
 	},
-	.id		= I2C_DRIVERID_LM83,
 	.attach_adapter	= lm83_attach_adapter,
 	.detach_client	= lm83_detach_client,
 };
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 9f1c6f1ee035..43212db7c798 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -367,7 +367,6 @@ static struct i2c_driver lm85_driver = {
 	.driver = {
 		.name   = "lm85",
 	},
-	.id             = I2C_DRIVERID_LM85,
 	.attach_adapter = lm85_attach_adapter,
 	.detach_client  = lm85_detach_client,
 };
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index 3ab4c3f0efdd..7bedf7455e9a 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -171,7 +171,6 @@ static struct i2c_driver lm87_driver = {
 	.driver = {
 		.name	= "lm87",
 	},
-	.id		= I2C_DRIVERID_LM87,
 	.attach_adapter	= lm87_attach_adapter,
 	.detach_client	= lm87_detach_client,
 };
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 116093d0eb3e..f7ec95bedbf6 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -204,7 +204,6 @@ static struct i2c_driver lm90_driver = {
 	.driver = {
 		.name	= "lm90",
 	},
-	.id		= I2C_DRIVERID_LM90,
 	.attach_adapter	= lm90_attach_adapter,
 	.detach_client	= lm90_detach_client,
 };
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index 61d1bd1d5b6e..af5c77d568fe 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -428,7 +428,6 @@ static struct i2c_driver lm92_driver = {
 	.driver = {
 		.name	= "lm92",
 	},
-	.id		= I2C_DRIVERID_LM92,
 	.attach_adapter	= lm92_attach_adapter,
 	.detach_client	= lm92_detach_client,
 };
diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c
index b5db354e2f19..1d6259d29e74 100644
--- a/drivers/hwmon/w83l785ts.c
+++ b/drivers/hwmon/w83l785ts.c
@@ -96,7 +96,6 @@ static struct i2c_driver w83l785ts_driver = {
 	.driver = {
 		.name	= "w83l785ts",
 	},
-	.id		= I2C_DRIVERID_W83L785TS,
 	.attach_adapter	= w83l785ts_attach_adapter,
 	.detach_client	= w83l785ts_detach_client,
 };
diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c
index fde297b21ad7..7dee001e5133 100644
--- a/drivers/i2c/chips/eeprom.c
+++ b/drivers/i2c/chips/eeprom.c
@@ -71,7 +71,6 @@ static struct i2c_driver eeprom_driver = {
 	.driver = {
 		.name	= "eeprom",
 	},
-	.id		= I2C_DRIVERID_EEPROM,
 	.attach_adapter	= eeprom_attach_adapter,
 	.detach_client	= eeprom_detach_client,
 };
diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c
index b3b830ccf209..e5b31329b56e 100644
--- a/drivers/i2c/chips/pcf8574.c
+++ b/drivers/i2c/chips/pcf8574.c
@@ -67,7 +67,6 @@ static struct i2c_driver pcf8574_driver = {
 	.driver = {
 		.name	= "pcf8574",
 	},
-	.id		= I2C_DRIVERID_PCF8574,
 	.attach_adapter	= pcf8574_attach_adapter,
 	.detach_client	= pcf8574_detach_client,
 };
diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c
index 865f4409c06b..66c7c3bb9429 100644
--- a/drivers/i2c/chips/pcf8591.c
+++ b/drivers/i2c/chips/pcf8591.c
@@ -92,7 +92,6 @@ static struct i2c_driver pcf8591_driver = {
 	.driver = {
 		.name	= "pcf8591",
 	},
-	.id		= I2C_DRIVERID_PCF8591,
 	.attach_adapter	= pcf8591_attach_adapter,
 	.detach_client	= pcf8591_detach_client,
 };
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index dc373e5eb262..b979112f74e0 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -96,39 +96,6 @@
 
 #define I2C_DRIVERID_I2CDEV	900
 
-/* IDs --   Use DRIVERIDs 1000-1999 for sensors.
-   These were originally in sensors.h in the lm_sensors package */
-#define I2C_DRIVERID_LM78 1002
-#define I2C_DRIVERID_LM75 1003
-#define I2C_DRIVERID_EEPROM 1005
-#define I2C_DRIVERID_LM80 1007
-#define I2C_DRIVERID_ADM1021 1008
-#define I2C_DRIVERID_ADM9240 1009
-#define I2C_DRIVERID_LTC1710 1010
-#define I2C_DRIVERID_BT869 1013
-#define I2C_DRIVERID_MAXILIFE 1014
-#define I2C_DRIVERID_MATORB 1015
-#define I2C_DRIVERID_THMC50 1017
-#define I2C_DRIVERID_ADM1025 1020
-#define I2C_DRIVERID_LM87 1021
-#define I2C_DRIVERID_PCF8574 1022
-#define I2C_DRIVERID_MTP008 1023
-#define I2C_DRIVERID_DS1621 1024
-#define I2C_DRIVERID_ADM1024 1025
-#define I2C_DRIVERID_CH700X 1027 /* single driver for CH7003-7009 digital pc to tv encoders */
-#define I2C_DRIVERID_FSCPOS 1028
-#define I2C_DRIVERID_FSCSCY 1029
-#define I2C_DRIVERID_PCF8591 1030
-#define I2C_DRIVERID_LM92 1033
-#define I2C_DRIVERID_SMARTBATT 1035
-#define I2C_DRIVERID_BMCSENSORS 1036
-#define I2C_DRIVERID_FS451 1037
-#define I2C_DRIVERID_LM85 1039
-#define I2C_DRIVERID_LM83 1040
-#define I2C_DRIVERID_LM90 1042
-#define I2C_DRIVERID_ASB100 1043
-#define I2C_DRIVERID_FSCHER 1046
-#define I2C_DRIVERID_W83L785TS 1047
 #define I2C_DRIVERID_OV7670 1048	/* Omnivision 7670 camera */
 
 /*
-- 
cgit 


From 683d0baad3d6e18134927f8c28ee804dbe10fe71 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 7 Jan 2008 23:20:29 -0800
Subject: SLUB: Use unique end pointer for each slab page.

We use a NULL pointer on freelists to signal that there are no more objects.
However the NULL pointers of all slabs match in contrast to the pointers to
the real objects which are in different ranges for different slab pages.

Change the end pointer to be a pointer to the first object and set bit 0.
Every slab will then have a different end pointer. This is necessary to ensure
that end markers can be matched to the source slab during cmpxchg_local.

Bring back the use of the mapping field by SLUB since we would otherwise have
to call a relatively expensive function page_address() in __slab_alloc().  Use
of the mapping field allows avoiding a call to page_address() in various other
functions as well.

There is no need to change the page_mapping() function since bit 0 is set on
the mapping as also for anonymous pages.  page_mapping(slab_page) will
therefore still return NULL although the mapping field is overloaded.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h |  5 +++-
 mm/slub.c                | 70 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 51 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 34023c65d466..bfee0bd1d435 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -64,7 +64,10 @@ struct page {
 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 	    spinlock_t ptl;
 #endif
-	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+	    struct {
+		   struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		   void *end;			/* SLUB: end marker */
+	    };
 	    struct page *first_page;	/* Compound tail pages */
 	};
 	union {
diff --git a/mm/slub.c b/mm/slub.c
index 2dacaf519c4d..5995626e0cf1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -280,15 +280,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
 #endif
 }
 
+/*
+ * The end pointer in a slab is special. It points to the first object in the
+ * slab but has bit 0 set to mark it.
+ *
+ * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
+ * in the mapping set.
+ */
+static inline int is_end(void *addr)
+{
+	return (unsigned long)addr & PAGE_MAPPING_ANON;
+}
+
+void *slab_address(struct page *page)
+{
+	return page->end - PAGE_MAPPING_ANON;
+}
+
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
 	void *base;
 
-	if (!object)
+	if (object == page->end)
 		return 1;
 
-	base = page_address(page);
+	base = slab_address(page);
 	if (object < base || object >= base + s->objects * s->size ||
 		(object - base) % s->size) {
 		return 0;
@@ -321,7 +338,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
 
 /* Scan freelist */
 #define for_each_free_object(__p, __s, __free) \
-	for (__p = (__free); __p; __p = get_freepointer((__s), __p))
+	for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
+		__p))
 
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -473,7 +491,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 {
 	unsigned int off;	/* Offset of last byte */
-	u8 *addr = page_address(page);
+	u8 *addr = slab_address(page);
 
 	print_tracking(s, p);
 
@@ -651,7 +669,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
 	if (!(s->flags & SLAB_POISON))
 		return 1;
 
-	start = page_address(page);
+	start = slab_address(page);
 	end = start + (PAGE_SIZE << s->order);
 	length = s->objects * s->size;
 	remainder = end - (start + length);
@@ -718,7 +736,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
 		 * of the free objects in this slab. May cause
 		 * another error because the object count is now wrong.
 		 */
-		set_freepointer(s, p, NULL);
+		set_freepointer(s, p, page->end);
 		return 0;
 	}
 	return 1;
@@ -752,18 +770,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 	void *fp = page->freelist;
 	void *object = NULL;
 
-	while (fp && nr <= s->objects) {
+	while (fp != page->end && nr <= s->objects) {
 		if (fp == search)
 			return 1;
 		if (!check_valid_pointer(s, page, fp)) {
 			if (object) {
 				object_err(s, page, object,
 					"Freechain corrupt");
-				set_freepointer(s, object, NULL);
+				set_freepointer(s, object, page->end);
 				break;
 			} else {
 				slab_err(s, page, "Freepointer corrupt");
-				page->freelist = NULL;
+				page->freelist = page->end;
 				page->inuse = s->objects;
 				slab_fix(s, "Freelist cleared");
 				return 0;
@@ -869,7 +887,7 @@ bad:
 		 */
 		slab_fix(s, "Marking all objects used");
 		page->inuse = s->objects;
-		page->freelist = NULL;
+		page->freelist = page->end;
 	}
 	return 0;
 }
@@ -910,7 +928,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
 	}
 
 	/* Special debug activities for freeing objects */
-	if (!SlabFrozen(page) && !page->freelist)
+	if (!SlabFrozen(page) && page->freelist == page->end)
 		remove_full(s, page);
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
@@ -1102,6 +1120,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		SetSlabDebug(page);
 
 	start = page_address(page);
+	page->end = start + 1;
 
 	if (unlikely(s->flags & SLAB_POISON))
 		memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1113,7 +1132,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		last = p;
 	}
 	setup_object(s, page, last);
-	set_freepointer(s, last, NULL);
+	set_freepointer(s, last, page->end);
 
 	page->freelist = start;
 	page->inuse = 0;
@@ -1129,7 +1148,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		void *p;
 
 		slab_pad_check(s, page);
-		for_each_object(p, s, page_address(page))
+		for_each_object(p, s, slab_address(page))
 			check_object(s, page, p, 0);
 		ClearSlabDebug(page);
 	}
@@ -1139,6 +1158,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
 
+	page->mapping = NULL;
 	__free_pages(page, s->order);
 }
 
@@ -1341,7 +1361,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	ClearSlabFrozen(page);
 	if (page->inuse) {
 
-		if (page->freelist)
+		if (page->freelist != page->end)
 			add_partial(n, page, tail);
 		else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
 			add_full(n, page);
@@ -1377,8 +1397,12 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
 	 * to occur.
+	 *
+	 * We need to use _is_end here because deactivate slab may
+	 * be called for a debug slab. Then c->freelist may contain
+	 * a dummy pointer.
 	 */
-	while (unlikely(c->freelist)) {
+	while (unlikely(!is_end(c->freelist))) {
 		void **object;
 
 		tail = 0;	/* Hot objects. Put the slab first */
@@ -1478,7 +1502,7 @@ static void *__slab_alloc(struct kmem_cache *s,
 		goto another_slab;
 load_freelist:
 	object = c->page->freelist;
-	if (unlikely(!object))
+	if (unlikely(object == c->page->end))
 		goto another_slab;
 	if (unlikely(SlabDebug(c->page)))
 		goto debug;
@@ -1486,7 +1510,7 @@ load_freelist:
 	object = c->page->freelist;
 	c->freelist = object[c->offset];
 	c->page->inuse = s->objects;
-	c->page->freelist = NULL;
+	c->page->freelist = c->page->end;
 	c->node = page_to_nid(c->page);
 	slab_unlock(c->page);
 	return object;
@@ -1550,7 +1574,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
-	if (unlikely(!c->freelist || !node_match(c, node)))
+	if (unlikely(is_end(c->freelist) || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
@@ -1614,7 +1638,7 @@ checks_ok:
 	 * was not on the partial list before
 	 * then add it.
 	 */
-	if (unlikely(!prior))
+	if (unlikely(prior == page->end))
 		add_partial(get_node(s, page_to_nid(page)), page, 1);
 
 out_unlock:
@@ -1622,7 +1646,7 @@ out_unlock:
 	return;
 
 slab_empty:
-	if (prior)
+	if (prior != page->end)
 		/*
 		 * Slab still on the partial list.
 		 */
@@ -1842,7 +1866,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
 			struct kmem_cache_cpu *c)
 {
 	c->page = NULL;
-	c->freelist = NULL;
+	c->freelist = (void *)PAGE_MAPPING_ANON;
 	c->node = 0;
 	c->offset = s->offset / sizeof(void *);
 	c->objsize = s->objsize;
@@ -3105,7 +3129,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
 {
 	void *p;
-	void *addr = page_address(page);
+	void *addr = slab_address(page);
 
 	if (!check_slab(s, page) ||
 			!on_freelist(s, page, NULL))
@@ -3385,7 +3409,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 static void process_slab(struct loc_track *t, struct kmem_cache *s,
 		struct page *page, enum track_item alloc)
 {
-	void *addr = page_address(page);
+	void *addr = slab_address(page);
 	DECLARE_BITMAP(map, s->objects);
 	void *p;
 
-- 
cgit 


From 8ff12cfc009a2a38d87fa7058226fe197bb2696f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 7 Feb 2008 17:47:41 -0800
Subject: SLUB: Support for performance statistics

The statistics provided here allow the monitoring of allocator behavior but
at the cost of some (minimal) loss of performance. Counters are placed in
SLUB's per cpu data structure. The per cpu structure may be extended by the
statistics to grow larger than one cacheline which will increase the cache
footprint of SLUB.

There is a compile option to enable/disable the inclusion of the runtime
statistics and its off by default.

The slabinfo tool is enhanced to support these statistics via two options:

-D 	Switches the line of information displayed for a slab from size
	mode to activity mode.

-A	Sorts the slabs displayed by activity. This allows the display of
	the slabs most important to the performance of a certain load.

-r	Report option will report detailed statistics on

Example (tbench load):

slabinfo -AD		->Shows the most active slabs

Name                   Objects    Alloc     Free   %Fast
skbuff_fclone_cache         33 111953835 111953835  99  99
:0000192                  2666  5283688  5281047  99  99
:0001024                   849  5247230  5246389  83  83
vm_area_struct            1349   119642   118355  91  22
:0004096                    15    66753    66751  98  98
:0000064                  2067    25297    23383  98  78
dentry                   10259    28635    18464  91  45
:0000080                 11004    18950     8089  98  98
:0000096                  1703    12358    10784  99  98
:0000128                   762    10582     9875  94  18
:0000512                   184     9807     9647  95  81
:0002048                   479     9669     9195  83  65
anon_vma                   777     9461     9002  99  71
kmalloc-8                 6492     9981     5624  99  97
:0000768                   258     7174     6931  58  15

So the skbuff_fclone_cache is of highest importance for the tbench load.
Pretty high load on the 192 sized slab. Look for the aliases

slabinfo -a | grep 000192
:0000192     <- xfs_btree_cur filp kmalloc-192 uid_cache tw_sock_TCP
	request_sock_TCPv6 tw_sock_TCPv6 skbuff_head_cache xfs_ili

Likely skbuff_head_cache.


Looking into the statistics of the skbuff_fclone_cache is possible through

slabinfo skbuff_fclone_cache	->-r option implied if cache name is mentioned


.... Usual output ...

Slab Perf Counter       Alloc     Free %Al %Fr
--------------------------------------------------
Fastpath             111953360 111946981  99  99
Slowpath                 1044     7423   0   0
Page Alloc                272      264   0   0
Add partial                25      325   0   0
Remove partial             86      264   0   0
RemoteObj/SlabFrozen      350     4832   0   0
Total                111954404 111954404

Flushes       49 Refill        0
Deactivate Full=325(92%) Empty=0(0%) ToHead=24(6%) ToTail=1(0%)

Looks good because the fastpath is overwhelmingly taken.


skbuff_head_cache:

Slab Perf Counter       Alloc     Free %Al %Fr
--------------------------------------------------
Fastpath              5297262  5259882  99  99
Slowpath                 4477    39586   0   0
Page Alloc                937      824   0   0
Add partial                 0     2515   0   0
Remove partial           1691      824   0   0
RemoteObj/SlabFrozen     2621     9684   0   0
Total                 5301739  5299468

Deactivate Full=2620(100%) Empty=0(0%) ToHead=0(0%) ToTail=0(0%)


Descriptions of the output:

Total:		The total number of allocation and frees that occurred for a
		slab

Fastpath:	The number of allocations/frees that used the fastpath.

Slowpath:	Other allocations

Page Alloc:	Number of calls to the page allocator as a result of slowpath
		processing

Add Partial:	Number of slabs added to the partial list through free or
		alloc (occurs during cpuslab flushes)

Remove Partial:	Number of slabs removed from the partial list as a result of
		allocations retrieving a partial slab or by a free freeing
		the last object of a slab.

RemoteObj/Froz:	How many times were remotely freed object encountered when a
		slab was about to be deactivated. Frozen: How many times was
		free able to skip list processing because the slab was in use
		as the cpuslab of another processor.

Flushes:	Number of times the cpuslab was flushed on request
		(kmem_cache_shrink, may result from races in __slab_alloc)

Refill:		Number of times we were able to refill the cpuslab from
		remotely freed objects for the same slab.

Deactivate:	Statistics how slabs were deactivated. Shows how they were
		put onto the partial list.

In general fastpath is very good. Slowpath without partial list processing is
also desirable. Any touching of partial list uses node specific locks which
may potentially cause list lock contention.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 Documentation/vm/slabinfo.c | 149 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/slub_def.h    |  23 +++++++
 lib/Kconfig.debug           |  13 ++++
 mm/slub.c                   | 127 ++++++++++++++++++++++++++++++++++---
 4 files changed, 293 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 488c1f31b992..7123fee708ca 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -32,6 +32,13 @@ struct slabinfo {
 	int sanity_checks, slab_size, store_user, trace;
 	int order, poison, reclaim_account, red_zone;
 	unsigned long partial, objects, slabs;
+	unsigned long alloc_fastpath, alloc_slowpath;
+	unsigned long free_fastpath, free_slowpath;
+	unsigned long free_frozen, free_add_partial, free_remove_partial;
+	unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+	unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+	unsigned long deactivate_to_head, deactivate_to_tail;
+	unsigned long deactivate_remote_frees;
 	int numa[MAX_NODES];
 	int numa_partial[MAX_NODES];
 } slabinfo[MAX_SLABS];
@@ -64,8 +71,10 @@ int show_inverted = 0;
 int show_single_ref = 0;
 int show_totals = 0;
 int sort_size = 0;
+int sort_active = 0;
 int set_debug = 0;
 int show_ops = 0;
+int show_activity = 0;
 
 /* Debug options */
 int sanity = 0;
@@ -93,8 +102,10 @@ void usage(void)
 	printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
 		"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
 		"-a|--aliases           Show aliases\n"
+		"-A|--activity          Most active slabs first\n"
 		"-d<options>|--debug=<options> Set/Clear Debug options\n"
-		"-e|--empty		Show empty slabs\n"
+		"-D|--display-active    Switch line format to activity\n"
+		"-e|--empty             Show empty slabs\n"
 		"-f|--first-alias       Show first alias\n"
 		"-h|--help              Show usage information\n"
 		"-i|--inverted          Inverted list\n"
@@ -281,8 +292,11 @@ int line = 0;
 
 void first_line(void)
 {
-	printf("Name                   Objects Objsize    Space "
-		"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
+	if (show_activity)
+		printf("Name                   Objects    Alloc     Free   %%Fast\n");
+	else
+		printf("Name                   Objects Objsize    Space "
+			"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
 }
 
 /*
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s)
 	return 	s->slabs * (page_size << s->order);
 }
 
+unsigned long slab_activity(struct slabinfo *s)
+{
+	return 	s->alloc_fastpath + s->free_fastpath +
+		s->alloc_slowpath + s->free_slowpath;
+}
+
 void slab_numa(struct slabinfo *s, int mode)
 {
 	int node;
@@ -392,6 +412,71 @@ const char *onoff(int x)
 	return "Off";
 }
 
+void slab_stats(struct slabinfo *s)
+{
+	unsigned long total_alloc;
+	unsigned long total_free;
+	unsigned long total;
+
+	if (!s->alloc_slab)
+		return;
+
+	total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+	total_free = s->free_fastpath + s->free_slowpath;
+
+	if (!total_alloc)
+		return;
+
+	printf("\n");
+	printf("Slab Perf Counter       Alloc     Free %%Al %%Fr\n");
+	printf("--------------------------------------------------\n");
+	printf("Fastpath             %8lu %8lu %3lu %3lu\n",
+		s->alloc_fastpath, s->free_fastpath,
+		s->alloc_fastpath * 100 / total_alloc,
+		s->free_fastpath * 100 / total_free);
+	printf("Slowpath             %8lu %8lu %3lu %3lu\n",
+		total_alloc - s->alloc_fastpath, s->free_slowpath,
+		(total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+		s->free_slowpath * 100 / total_free);
+	printf("Page Alloc           %8lu %8lu %3lu %3lu\n",
+		s->alloc_slab, s->free_slab,
+		s->alloc_slab * 100 / total_alloc,
+		s->free_slab * 100 / total_free);
+	printf("Add partial          %8lu %8lu %3lu %3lu\n",
+		s->deactivate_to_head + s->deactivate_to_tail,
+		s->free_add_partial,
+		(s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+		s->free_add_partial * 100 / total_free);
+	printf("Remove partial       %8lu %8lu %3lu %3lu\n",
+		s->alloc_from_partial, s->free_remove_partial,
+		s->alloc_from_partial * 100 / total_alloc,
+		s->free_remove_partial * 100 / total_free);
+
+	printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+		s->deactivate_remote_frees, s->free_frozen,
+		s->deactivate_remote_frees * 100 / total_alloc,
+		s->free_frozen * 100 / total_free);
+
+	printf("Total                %8lu %8lu\n\n", total_alloc, total_free);
+
+	if (s->cpuslab_flush)
+		printf("Flushes %8lu\n", s->cpuslab_flush);
+
+	if (s->alloc_refill)
+		printf("Refill %8lu\n", s->alloc_refill);
+
+	total = s->deactivate_full + s->deactivate_empty +
+			s->deactivate_to_head + s->deactivate_to_tail;
+
+	if (total)
+		printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
+			"ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
+			s->deactivate_full, (s->deactivate_full * 100) / total,
+			s->deactivate_empty, (s->deactivate_empty * 100) / total,
+			s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
+			s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+}
+
 void report(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
@@ -430,6 +515,7 @@ void report(struct slabinfo *s)
 	ops(s);
 	show_tracking(s);
 	slab_numa(s, 1);
+	slab_stats(s);
 }
 
 void slabcache(struct slabinfo *s)
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s)
 		*p++ = 'T';
 
 	*p = 0;
-	printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
-		s->name, s->objects, s->object_size, size_str, dist_str,
-		s->objs_per_slab, s->order,
-		s->slabs ? (s->partial * 100) / s->slabs : 100,
-		s->slabs ? (s->objects * s->object_size * 100) /
-			(s->slabs * (page_size << s->order)) : 100,
-		flags);
+	if (show_activity) {
+		unsigned long total_alloc;
+		unsigned long total_free;
+
+		total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+		total_free = s->free_fastpath + s->free_slowpath;
+
+		printf("%-21s %8ld %8ld %8ld %3ld %3ld \n",
+			s->name, s->objects,
+			total_alloc, total_free,
+			total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+			total_free ? (s->free_fastpath * 100 / total_free) : 0);
+	}
+	else
+		printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+			s->name, s->objects, s->object_size, size_str, dist_str,
+			s->objs_per_slab, s->order,
+			s->slabs ? (s->partial * 100) / s->slabs : 100,
+			s->slabs ? (s->objects * s->object_size * 100) /
+				(s->slabs * (page_size << s->order)) : 100,
+			flags);
 }
 
 /*
@@ -892,6 +992,8 @@ void sort_slabs(void)
 
 			if (sort_size)
 				result = slab_size(s1) < slab_size(s2);
+			else if (sort_active)
+				result = slab_activity(s1) < slab_activity(s2);
 			else
 				result = strcasecmp(s1->name, s2->name);
 
@@ -1074,6 +1176,23 @@ void read_slab_dir(void)
 			free(t);
 			slab->store_user = get_obj("store_user");
 			slab->trace = get_obj("trace");
+			slab->alloc_fastpath = get_obj("alloc_fastpath");
+			slab->alloc_slowpath = get_obj("alloc_slowpath");
+			slab->free_fastpath = get_obj("free_fastpath");
+			slab->free_slowpath = get_obj("free_slowpath");
+			slab->free_frozen= get_obj("free_frozen");
+			slab->free_add_partial = get_obj("free_add_partial");
+			slab->free_remove_partial = get_obj("free_remove_partial");
+			slab->alloc_from_partial = get_obj("alloc_from_partial");
+			slab->alloc_slab = get_obj("alloc_slab");
+			slab->alloc_refill = get_obj("alloc_refill");
+			slab->free_slab = get_obj("free_slab");
+			slab->cpuslab_flush = get_obj("cpuslab_flush");
+			slab->deactivate_full = get_obj("deactivate_full");
+			slab->deactivate_empty = get_obj("deactivate_empty");
+			slab->deactivate_to_head = get_obj("deactivate_to_head");
+			slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+			slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
 			chdir("..");
 			if (slab->name[0] == ':')
 				alias_targets++;
@@ -1124,7 +1243,9 @@ void output_slabs(void)
 
 struct option opts[] = {
 	{ "aliases", 0, NULL, 'a' },
+	{ "activity", 0, NULL, 'A' },
 	{ "debug", 2, NULL, 'd' },
+	{ "display-activity", 0, NULL, 'D' },
 	{ "empty", 0, NULL, 'e' },
 	{ "first-alias", 0, NULL, 'f' },
 	{ "help", 0, NULL, 'h' },
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[])
 
 	page_size = getpagesize();
 
-	while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS",
+	while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
 						opts, NULL)) != -1)
 		switch (c) {
 		case '1':
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[])
 		case 'a':
 			show_alias = 1;
 			break;
+		case 'A':
+			sort_active = 1;
+			break;
 		case 'd':
 			set_debug = 1;
 			if (!debug_opt_scan(optarg))
 				fatal("Invalid debug option '%s'\n", optarg);
 			break;
+		case 'D':
+			show_activity = 1;
+			break;
 		case 'e':
 			show_empty = 1;
 			break;
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ddb1a706b144..5e6d3d634d5b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,12 +11,35 @@
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
 
+enum stat_item {
+	ALLOC_FASTPATH,		/* Allocation from cpu slab */
+	ALLOC_SLOWPATH,		/* Allocation by getting a new cpu slab */
+	FREE_FASTPATH,		/* Free to cpu slub */
+	FREE_SLOWPATH,		/* Freeing not to cpu slab */
+	FREE_FROZEN,		/* Freeing to frozen slab */
+	FREE_ADD_PARTIAL,	/* Freeing moves slab to partial list */
+	FREE_REMOVE_PARTIAL,	/* Freeing removes last object */
+	ALLOC_FROM_PARTIAL,	/* Cpu slab acquired from partial list */
+	ALLOC_SLAB,		/* Cpu slab acquired from page allocator */
+	ALLOC_REFILL,		/* Refill cpu slab from slab freelist */
+	FREE_SLAB,		/* Slab freed to the page allocator */
+	CPUSLAB_FLUSH,		/* Abandoning of the cpu slab */
+	DEACTIVATE_FULL,	/* Cpu slab was full when deactivated */
+	DEACTIVATE_EMPTY,	/* Cpu slab was empty when deactivated */
+	DEACTIVATE_TO_HEAD,	/* Cpu slab was moved to the head of partials */
+	DEACTIVATE_TO_TAIL,	/* Cpu slab was moved to the tail of partials */
+	DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
+	NR_SLUB_STAT_ITEMS };
+
 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to first free per cpu object */
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
 	unsigned int offset;	/* Freepointer offset (in word units) */
 	unsigned int objsize;	/* Size of an object (from kmem_cache) */
+#ifdef CONFIG_SLUB_STATS
+	unsigned stat[NR_SLUB_STAT_ITEMS];
+#endif
 };
 
 struct kmem_cache_node {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d385be682db..4f4008fc73e4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -205,6 +205,19 @@ config SLUB_DEBUG_ON
 	  off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
 	  "slub_debug=-".
 
+config SLUB_STATS
+	default n
+	bool "Enable SLUB performance statistics"
+	depends on SLUB
+	help
+	  SLUB statistics are useful to debug SLUBs allocation behavior in
+	  order find ways to optimize the allocator. This should never be
+	  enabled for production use since keeping statistics slows down
+	  the allocator by a few percentage points. The slabinfo command
+	  supports the determination of the most active slabs to figure
+	  out which slabs are relevant to a particular load.
+	  Try running: slabinfo -DA
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/mm/slub.c b/mm/slub.c
index 20ab8f0a4eb9..ac836d31e3be 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -250,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
+
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
@@ -258,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
 {
 	kfree(s);
 }
+
 #endif
 
+static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
+{
+#ifdef CONFIG_SLUB_STATS
+	c->stat[si]++;
+#endif
+}
+
 /********************************************************************
  * 			Core slab cache functions
  *******************************************************************/
@@ -1364,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
 
 	ClearSlabFrozen(page);
 	if (page->inuse) {
 
-		if (page->freelist != page->end)
+		if (page->freelist != page->end) {
 			add_partial(n, page, tail);
-		else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
-			add_full(n, page);
+			stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
+		} else {
+			stat(c, DEACTIVATE_FULL);
+			if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
+				add_full(n, page);
+		}
 		slab_unlock(page);
-
 	} else {
+		stat(c, DEACTIVATE_EMPTY);
 		if (n->nr_partial < MIN_PARTIAL) {
 			/*
 			 * Adding an empty slab to the partial slabs in order
@@ -1388,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			slab_unlock(page);
 		} else {
 			slab_unlock(page);
+			stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
 			discard_slab(s, page);
 		}
 	}
@@ -1400,6 +1415,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
 	struct page *page = c->page;
 	int tail = 1;
+
+	if (c->freelist)
+		stat(c, DEACTIVATE_REMOTE_FREES);
 	/*
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
@@ -1429,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 
 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
+	stat(c, CPUSLAB_FLUSH);
 	slab_lock(c->page);
 	deactivate_slab(s, c);
 }
@@ -1511,6 +1530,7 @@ static void *__slab_alloc(struct kmem_cache *s,
 	slab_lock(c->page);
 	if (unlikely(!node_match(c, node)))
 		goto another_slab;
+	stat(c, ALLOC_REFILL);
 load_freelist:
 	object = c->page->freelist;
 	if (unlikely(object == c->page->end))
@@ -1525,6 +1545,7 @@ load_freelist:
 	c->node = page_to_nid(c->page);
 unlock_out:
 	slab_unlock(c->page);
+	stat(c, ALLOC_SLOWPATH);
 out:
 #ifdef SLUB_FASTPATH
 	local_irq_restore(flags);
@@ -1538,6 +1559,7 @@ new_slab:
 	new = get_partial(s, gfpflags, node);
 	if (new) {
 		c->page = new;
+		stat(c, ALLOC_FROM_PARTIAL);
 		goto load_freelist;
 	}
 
@@ -1551,6 +1573,7 @@ new_slab:
 
 	if (new) {
 		c = get_cpu_slab(s, smp_processor_id());
+		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
 		slab_lock(new);
@@ -1610,6 +1633,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 			object = __slab_alloc(s, gfpflags, node, addr, c);
 			break;
 		}
+		stat(c, ALLOC_FASTPATH);
 	} while (cmpxchg_local(&c->freelist, object, object[c->offset])
 								!= object);
 #else
@@ -1624,6 +1648,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	else {
 		object = c->freelist;
 		c->freelist = object[c->offset];
+		stat(c, ALLOC_FASTPATH);
 	}
 	local_irq_restore(flags);
 #endif
@@ -1661,12 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 {
 	void *prior;
 	void **object = (void *)x;
+	struct kmem_cache_cpu *c;
 
 #ifdef SLUB_FASTPATH
 	unsigned long flags;
 
 	local_irq_save(flags);
 #endif
+	c = get_cpu_slab(s, raw_smp_processor_id());
+	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
 
 	if (unlikely(SlabDebug(page)))
@@ -1676,8 +1704,10 @@ checks_ok:
 	page->freelist = object;
 	page->inuse--;
 
-	if (unlikely(SlabFrozen(page)))
+	if (unlikely(SlabFrozen(page))) {
+		stat(c, FREE_FROZEN);
 		goto out_unlock;
+	}
 
 	if (unlikely(!page->inuse))
 		goto slab_empty;
@@ -1687,8 +1717,10 @@ checks_ok:
 	 * was not on the partial list before
 	 * then add it.
 	 */
-	if (unlikely(prior == page->end))
+	if (unlikely(prior == page->end)) {
 		add_partial(get_node(s, page_to_nid(page)), page, 1);
+		stat(c, FREE_ADD_PARTIAL);
+	}
 
 out_unlock:
 	slab_unlock(page);
@@ -1698,13 +1730,15 @@ out_unlock:
 	return;
 
 slab_empty:
-	if (prior != page->end)
+	if (prior != page->end) {
 		/*
 		 * Slab still on the partial list.
 		 */
 		remove_partial(s, page);
-
+		stat(c, FREE_REMOVE_PARTIAL);
+	}
 	slab_unlock(page);
+	stat(c, FREE_SLAB);
 #ifdef SLUB_FASTPATH
 	local_irq_restore(flags);
 #endif
@@ -1758,6 +1792,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 			break;
 		}
 		object[c->offset] = freelist;
+		stat(c, FREE_FASTPATH);
 	} while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
 #else
 	unsigned long flags;
@@ -1768,6 +1803,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 	if (likely(page == c->page && c->node >= 0)) {
 		object[c->offset] = c->freelist;
 		c->freelist = object;
+		stat(c, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr, c->offset);
 
@@ -3980,6 +4016,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
 SLAB_ATTR(remote_node_defrag_ratio);
 #endif
 
+#ifdef CONFIG_SLUB_STATS
+
+static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
+{
+	unsigned long sum  = 0;
+	int cpu;
+	int len;
+	int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
+
+	if (!data)
+		return -ENOMEM;
+
+	for_each_online_cpu(cpu) {
+		unsigned x = get_cpu_slab(s, cpu)->stat[si];
+
+		data[cpu] = x;
+		sum += x;
+	}
+
+	len = sprintf(buf, "%lu", sum);
+
+	for_each_online_cpu(cpu) {
+		if (data[cpu] && len < PAGE_SIZE - 20)
+			len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
+	}
+	kfree(data);
+	return len + sprintf(buf + len, "\n");
+}
+
+#define STAT_ATTR(si, text) 					\
+static ssize_t text##_show(struct kmem_cache *s, char *buf)	\
+{								\
+	return show_stat(s, buf, si);				\
+}								\
+SLAB_ATTR_RO(text);						\
+
+STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
+STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
+STAT_ATTR(FREE_FASTPATH, free_fastpath);
+STAT_ATTR(FREE_SLOWPATH, free_slowpath);
+STAT_ATTR(FREE_FROZEN, free_frozen);
+STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
+STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
+STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
+STAT_ATTR(ALLOC_SLAB, alloc_slab);
+STAT_ATTR(ALLOC_REFILL, alloc_refill);
+STAT_ATTR(FREE_SLAB, free_slab);
+STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
+STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
+STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
+STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
+STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
+STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
+
+#endif
+
 static struct attribute *slab_attrs[] = {
 	&slab_size_attr.attr,
 	&object_size_attr.attr,
@@ -4009,6 +4101,25 @@ static struct attribute *slab_attrs[] = {
 #endif
 #ifdef CONFIG_NUMA
 	&remote_node_defrag_ratio_attr.attr,
+#endif
+#ifdef CONFIG_SLUB_STATS
+	&alloc_fastpath_attr.attr,
+	&alloc_slowpath_attr.attr,
+	&free_fastpath_attr.attr,
+	&free_slowpath_attr.attr,
+	&free_frozen_attr.attr,
+	&free_add_partial_attr.attr,
+	&free_remove_partial_attr.attr,
+	&alloc_from_partial_attr.attr,
+	&alloc_slab_attr.attr,
+	&alloc_refill_attr.attr,
+	&free_slab_attr.attr,
+	&cpuslab_flush_attr.attr,
+	&deactivate_full_attr.attr,
+	&deactivate_empty_attr.attr,
+	&deactivate_to_head_attr.attr,
+	&deactivate_to_tail_attr.attr,
+	&deactivate_remote_frees_attr.attr,
 #endif
 	NULL
 };
-- 
cgit 


From 76c072b48e39e9291fbf02d6c912cf27d65e093d Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Fri, 8 Feb 2008 02:09:56 +0000
Subject: dm ioctl: move compat code

Move compat_ioctl handling into dm-ioctl.c.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c    | 15 +++++++++++++--
 fs/compat_ioctl.c        | 34 ----------------------------------
 include/linux/dm-ioctl.h | 34 ++--------------------------------
 3 files changed, 15 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 4aa1f78b78f0..9c491397a51d 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/dm-ioctl.h>
 #include <linux/hdreg.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 
@@ -1350,10 +1351,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
 {
 	struct dm_ioctl tmp, *dmi;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)))
+	if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data)))
 		return -EFAULT;
 
-	if (tmp.data_size < sizeof(tmp))
+	if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data)))
 		return -EINVAL;
 
 	dmi = vmalloc(tmp.data_size);
@@ -1474,8 +1475,18 @@ static long dm_ctl_ioctl(struct file *file, uint command, ulong u)
 	return (long)ctl_ioctl(command, (struct dm_ioctl __user *)u);
 }
 
+#ifdef CONFIG_COMPAT
+static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
+{
+	return (long)dm_ctl_ioctl(file, command, (ulong) compat_ptr(u));
+}
+#else
+#define dm_compat_ctl_ioctl NULL
+#endif
+
 static const struct file_operations _ctl_fops = {
 	.unlocked_ioctl	 = dm_ctl_ioctl,
+	.compat_ioctl = dm_compat_ctl_ioctl,
 	.owner	 = THIS_MODULE,
 };
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 614bd75b5a4a..ee32c0eac7c1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -78,7 +78,6 @@
 #include <linux/mii.h>
 #include <linux/if_bonding.h>
 #include <linux/watchdog.h>
-#include <linux/dm-ioctl.h>
 
 #include <linux/soundcard.h>
 #include <linux/lp.h>
@@ -1993,39 +1992,6 @@ COMPATIBLE_IOCTL(STOP_ARRAY_RO)
 COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
 COMPATIBLE_IOCTL(GET_BITMAP_FILE)
 ULONG_IOCTL(SET_BITMAP_FILE)
-/* DM */
-COMPATIBLE_IOCTL(DM_VERSION_32)
-COMPATIBLE_IOCTL(DM_REMOVE_ALL_32)
-COMPATIBLE_IOCTL(DM_LIST_DEVICES_32)
-COMPATIBLE_IOCTL(DM_DEV_CREATE_32)
-COMPATIBLE_IOCTL(DM_DEV_REMOVE_32)
-COMPATIBLE_IOCTL(DM_DEV_RENAME_32)
-COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32)
-COMPATIBLE_IOCTL(DM_DEV_STATUS_32)
-COMPATIBLE_IOCTL(DM_DEV_WAIT_32)
-COMPATIBLE_IOCTL(DM_TABLE_LOAD_32)
-COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32)
-COMPATIBLE_IOCTL(DM_TABLE_DEPS_32)
-COMPATIBLE_IOCTL(DM_TABLE_STATUS_32)
-COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32)
-COMPATIBLE_IOCTL(DM_TARGET_MSG_32)
-COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32)
-COMPATIBLE_IOCTL(DM_VERSION)
-COMPATIBLE_IOCTL(DM_REMOVE_ALL)
-COMPATIBLE_IOCTL(DM_LIST_DEVICES)
-COMPATIBLE_IOCTL(DM_DEV_CREATE)
-COMPATIBLE_IOCTL(DM_DEV_REMOVE)
-COMPATIBLE_IOCTL(DM_DEV_RENAME)
-COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
-COMPATIBLE_IOCTL(DM_DEV_STATUS)
-COMPATIBLE_IOCTL(DM_DEV_WAIT)
-COMPATIBLE_IOCTL(DM_TABLE_LOAD)
-COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
-COMPATIBLE_IOCTL(DM_TABLE_DEPS)
-COMPATIBLE_IOCTL(DM_TABLE_STATUS)
-COMPATIBLE_IOCTL(DM_LIST_VERSIONS)
-COMPATIBLE_IOCTL(DM_TARGET_MSG)
-COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY)
 /* Big K */
 COMPATIBLE_IOCTL(PIO_FONT)
 COMPATIBLE_IOCTL(GIO_FONT)
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 523281c5b7f5..b03c41bbfa14 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -232,36 +232,6 @@ enum {
 	DM_DEV_SET_GEOMETRY_CMD
 };
 
-/*
- * The dm_ioctl struct passed into the ioctl is just the header
- * on a larger chunk of memory.  On x86-64 and other
- * architectures the dm-ioctl struct will be padded to an 8 byte
- * boundary so the size will be different, which would change the
- * ioctl code - yes I really messed up.  This hack forces these
- * architectures to have the correct ioctl code.
- */
-#ifdef CONFIG_COMPAT
-typedef char ioctl_struct[308];
-#define DM_VERSION_32       _IOWR(DM_IOCTL, DM_VERSION_CMD, ioctl_struct)
-#define DM_REMOVE_ALL_32    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, ioctl_struct)
-#define DM_LIST_DEVICES_32  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, ioctl_struct)
-
-#define DM_DEV_CREATE_32    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, ioctl_struct)
-#define DM_DEV_REMOVE_32    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, ioctl_struct)
-#define DM_DEV_RENAME_32    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, ioctl_struct)
-#define DM_DEV_SUSPEND_32   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, ioctl_struct)
-#define DM_DEV_STATUS_32    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, ioctl_struct)
-#define DM_DEV_WAIT_32      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, ioctl_struct)
-
-#define DM_TABLE_LOAD_32    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, ioctl_struct)
-#define DM_TABLE_CLEAR_32   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, ioctl_struct)
-#define DM_TABLE_DEPS_32    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, ioctl_struct)
-#define DM_TABLE_STATUS_32  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct)
-#define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct)
-#define DM_TARGET_MSG_32    _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, ioctl_struct)
-#define DM_DEV_SET_GEOMETRY_32	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, ioctl_struct)
-#endif
-
 #define DM_IOCTL 0xfd
 
 #define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
@@ -286,9 +256,9 @@ typedef char ioctl_struct[308];
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	12
+#define DM_VERSION_MINOR	13
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2007-10-02)"
+#define DM_VERSION_EXTRA	"-ioctl (2007-10-18)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit 


From 4f41b09f86e0e3b48194b2ad0356391bf6d47e40 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Fri, 8 Feb 2008 02:10:01 +0000
Subject: dm: table remove unused variable

Save some bytes.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e765e191663d..cb784579956b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -110,15 +110,15 @@ struct target_type {
 };
 
 struct io_restrictions {
-	unsigned int		max_sectors;
-	unsigned short		max_phys_segments;
-	unsigned short		max_hw_segments;
-	unsigned short		hardsect_size;
-	unsigned int		max_segment_size;
-	unsigned int		max_hw_sectors;
-	unsigned long		seg_boundary_mask;
-	unsigned long		bounce_pfn;
-	unsigned char		no_cluster; /* inverted so that 0 is default */
+	unsigned long bounce_pfn;
+	unsigned long seg_boundary_mask;
+	unsigned max_hw_sectors;
+	unsigned max_sectors;
+	unsigned max_segment_size;
+	unsigned short hardsect_size;
+	unsigned short max_hw_segments;
+	unsigned short max_phys_segments;
+	unsigned char no_cluster; /* inverted so that 0 is default */
 };
 
 struct dm_target {
-- 
cgit 


From 405137d16fbe4c80b9e06e61af05856027745d23 Mon Sep 17 00:00:00 2001
From: Joy Latten <latten@austin.ibm.com>
Date: Thu, 7 Feb 2008 23:11:56 -0800
Subject: [IPSEC]: Add support for aes-ctr.

The below patch allows IPsec to use CTR mode with AES encryption
algorithm. Tested this using setkey in ipsec-tools.

Signed-off-by: Joy Latten <latten@austin.ibm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h |  1 +
 net/xfrm/xfrm_algo.c    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 6db69ff5d83e..700725ddcaae 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -298,6 +298,7 @@ struct sadb_x_sec_ctx {
 #define SADB_X_EALG_BLOWFISHCBC		7
 #define SADB_EALG_NULL			11
 #define SADB_X_EALG_AESCBC		12
+#define SADB_X_EALG_AESCTR		13
 #define SADB_X_EALG_AES_CCM_ICV8	14
 #define SADB_X_EALG_AES_CCM_ICV12	15
 #define SADB_X_EALG_AES_CCM_ICV16	16
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6cc15250de69..8aa6440d689f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -399,6 +399,23 @@ static struct xfrm_algo_desc ealg_list[] = {
 		.sadb_alg_maxbits = 256
 	}
 },
+{
+	.name = "rfc3686(ctr(aes))",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 128,
+			.defkeybits = 160, /* 128-bit key + 32-bit nonce */
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_AESCTR,
+		.sadb_alg_ivlen	= 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
 };
 
 static struct xfrm_algo_desc calg_list[] = {
-- 
cgit 


From 63a7138671c50a6f2c27bbd1a308dc75967062a3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 8 Feb 2008 12:41:03 +0100
Subject: block: fixup rq_init() a bit

Rearrange fields in cache order and initialize some fields that
we didn't previously init. Remove init of ->completion_data, it's
part of a union with ->hash. Luckily clearing the rb node is the same
as setting it to null!

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 23 +++++++++++++++++------
 include/linux/blkdev.h |  4 +++-
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 4afb39c82339..fba4ca7c6086 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -102,27 +102,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
+/*
+ * We can't just memset() the structure, since the allocation path
+ * already stored some information in the request.
+ */
 void rq_init(struct request_queue *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
-
-	rq->errors = 0;
+	rq->q = q;
+	rq->sector = rq->hard_sector = (sector_t) -1;
+	rq->nr_sectors = rq->hard_nr_sectors = 0;
+	rq->current_nr_sectors = rq->hard_cur_sectors = 0;
 	rq->bio = rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
+	rq->rq_disk = NULL;
+	rq->nr_phys_segments = 0;
+	rq->nr_hw_segments = 0;
 	rq->ioprio = 0;
+	rq->special = NULL;
 	rq->buffer = NULL;
+	rq->tag = -1;
+	rq->errors = 0;
 	rq->ref_count = 1;
-	rq->q = q;
-	rq->special = NULL;
+	rq->cmd_len = 0;
+	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->data_len = 0;
+	rq->sense_len = 0;
 	rq->data = NULL;
-	rq->nr_phys_segments = 0;
 	rq->sense = NULL;
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
-	rq->completion_data = NULL;
 	rq->next_rq = NULL;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 90392a9d7a9c..e1888cc5b8ae 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -137,7 +137,9 @@ enum rq_flag_bits {
 #define BLK_MAX_CDB	16
 
 /*
- * try to put the fields that are referenced together in the same cacheline
+ * try to put the fields that are referenced together in the same cacheline.
+ * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * as well!
  */
 struct request {
 	struct list_head queuelist;
-- 
cgit 


From ea5c48ab2a76559d4af39e1f7de137c0851ac0a5 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Fri, 8 Feb 2008 11:04:09 +0100
Subject: Enhanced partition statistics: core statistics

This patch contain the core infrastructure of enhanced partition
statistics. It adds to struct hd_struct the same stats data as struct
gendisk and define basics function to manipulate them.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 151 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 141 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 1dbea0ac5693..589830aca99d 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -91,6 +91,15 @@ struct partition {
 	__le32 nr_sects;		/* nr of sectors in partition */
 } __attribute__((packed));
 
+struct disk_stats {
+	unsigned long sectors[2];	/* READs and WRITEs */
+	unsigned long ios[2];
+	unsigned long merges[2];
+	unsigned long ticks[2];
+	unsigned long io_ticks;
+	unsigned long time_in_queue;
+};
+	
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
@@ -100,6 +109,13 @@ struct hd_struct {
 	int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
+#endif
+	unsigned long stamp;
+	int in_flight;
+#ifdef	CONFIG_SMP
+	struct disk_stats *dkstats;
+#else
+	struct disk_stats dkstats;
 #endif
 };
 
@@ -111,15 +127,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_FAIL				64
 
-struct disk_stats {
-	unsigned long sectors[2];	/* READs and WRITEs */
-	unsigned long ios[2];
-	unsigned long merges[2];
-	unsigned long ticks[2];
-	unsigned long io_ticks;
-	unsigned long time_in_queue;
-};
-	
+
 struct gendisk {
 	int major;			/* major number of driver */
 	int first_minor;
@@ -158,6 +166,20 @@ struct gendisk {
  * The __ variants should only be called in critical sections. The full
  * variants disable/enable preemption.
  */
+static inline struct hd_struct *get_part(struct gendisk *gendiskp,
+					 sector_t sector)
+{
+	struct hd_struct *part;
+	int i;
+	for (i = 0; i < gendiskp->minors - 1; i++) {
+		part = gendiskp->part[i];
+		if (part && part->start_sect <= sector
+		    && sector < part->start_sect + part->nr_sects)
+			return part;
+	}
+	return NULL;
+}
+
 #ifdef	CONFIG_SMP
 #define __disk_stat_add(gendiskp, field, addnd) 	\
 	(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
@@ -177,15 +199,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
 				sizeof (struct disk_stats));
 }		
+
+#define __part_stat_add(part, field, addnd)				\
+	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)		\
+({								\
+	struct hd_struct *part = get_part(gendiskp, sector);	\
+	if (part)						\
+		__part_stat_add(part, field, addnd);		\
+	__disk_stat_add(gendiskp, field, addnd);		\
+})
+
+#define part_stat_read(part, field)					\
+({									\
+	typeof(part->dkstats->field) res = 0;				\
+	int i;								\
+	for_each_possible_cpu(i)					\
+		res += per_cpu_ptr(part->dkstats, i)->field;		\
+	res;								\
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)	{
+	int i;
+	for_each_possible_cpu(i)
+		memset(per_cpu_ptr(part->dkstats, i), value,
+		       sizeof(struct disk_stats));
+}
 				
 #else
 #define __disk_stat_add(gendiskp, field, addnd) \
 				(gendiskp->dkstats.field += addnd)
 #define disk_stat_read(gendiskp, field)	(gendiskp->dkstats.field)
 
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
+static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
+{
 	memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
 }
+
+#define __part_stat_add(part, field, addnd) \
+	(part->dkstats.field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)		\
+({								\
+	struct hd_struct *part = get_part(gendiskp, sector);	\
+	if (part)						\
+		part->dkstats.field += addnd;			\
+	__disk_stat_add(gendiskp, field, addnd);		\
+})
+
+#define part_stat_read(part, field)	(part->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+	memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
 #endif
 
 #define disk_stat_add(gendiskp, field, addnd)			\
@@ -206,6 +275,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 #define disk_stat_sub(gendiskp, field, subnd) \
 		disk_stat_add(gendiskp, field, -subnd)
 
+#define part_stat_add(gendiskp, field, addnd)		\
+	do {						\
+		preempt_disable();			\
+		__part_stat_add(gendiskp, field, addnd);\
+		preempt_enable();			\
+	} while (0)
+
+#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
+#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
+
+#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
+#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
+
+#define __part_stat_sub(gendiskp, field, subnd) \
+		__part_stat_add(gendiskp, field, -subnd)
+#define part_stat_sub(gendiskp, field, subnd) \
+		part_stat_add(gendiskp, field, -subnd)
+
+#define all_stat_add(gendiskp, field, addnd, sector)		\
+	do {							\
+		preempt_disable();				\
+		__all_stat_add(gendiskp, field, addnd, sector);	\
+		preempt_enable();				\
+	} while (0)
+
+#define __all_stat_dec(gendiskp, field, sector) \
+		__all_stat_add(gendiskp, field, -1, sector)
+#define all_stat_dec(gendiskp, field, sector) \
+		all_stat_add(gendiskp, field, -1, sector)
+
+#define __all_stat_inc(gendiskp, field, sector) \
+		__all_stat_add(gendiskp, field, 1, sector)
+#define all_stat_inc(gendiskp, field, sector) \
+		all_stat_add(gendiskp, field, 1, sector)
+
+#define __all_stat_sub(gendiskp, field, subnd, sector) \
+		__all_stat_add(gendiskp, field, -subnd, sector)
+#define all_stat_sub(gendiskp, field, subnd, sector) \
+		all_stat_add(gendiskp, field, -subnd, sector)
 
 /* Inlines to alloc and free disk stats in struct gendisk */
 #ifdef  CONFIG_SMP
@@ -221,6 +329,20 @@ static inline void free_disk_stats(struct gendisk *disk)
 {
 	free_percpu(disk->dkstats);
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+	part->dkstats = alloc_percpu(struct disk_stats);
+	if (!part->dkstats)
+		return 0;
+	return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+	free_percpu(part->dkstats);
+}
+
 #else	/* CONFIG_SMP */
 static inline int init_disk_stats(struct gendisk *disk)
 {
@@ -230,6 +352,15 @@ static inline int init_disk_stats(struct gendisk *disk)
 static inline void free_disk_stats(struct gendisk *disk)
 {
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+	return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
 #endif	/* CONFIG_SMP */
 
 /* drivers/block/ll_rw_blk.c */
-- 
cgit 


From 6f2576af5ba5913538fda7dfb7c6a17771025477 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Fri, 8 Feb 2008 11:04:35 +0100
Subject: Enhanced partition statistics: update partition statitics

Updates the enhanced partition statistics in generic block layer
besides the disk statistics.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      | 34 ++++++++++++++++++++++++++++++----
 block/blk-merge.c     |  6 ++++++
 fs/partitions/check.c |  7 +++++++
 include/linux/genhd.h |  1 +
 4 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fba4ca7c6086..2358fc5de5a4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	if (!new_io) {
-		__disk_stat_inc(rq->rq_disk, merges[rw]);
+		__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
 	} else {
+		struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
 		disk_round_stats(rq->rq_disk);
 		rq->rq_disk->in_flight++;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight++;
+		}
 	}
 }
 
@@ -997,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(disk_round_stats);
 
+void part_round_stats(struct hd_struct *part)
+{
+	unsigned long now = jiffies;
+
+	if (now == part->stamp)
+		return;
+
+	if (part->in_flight) {
+		__part_stat_add(part, time_in_queue,
+				part->in_flight * (now - part->stamp));
+		__part_stat_add(part, io_ticks, (now - part->stamp));
+	}
+	part->stamp = now;
+}
+
 /*
  * queue lock must be held
  */
@@ -1530,7 +1550,8 @@ static int __end_that_request_first(struct request *req, int error,
 	if (blk_fs_request(req) && req->rq_disk) {
 		const int rw = rq_data_dir(req);
 
-		disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
+		all_stat_add(req->rq_disk, sectors[rw],
+			     nr_bytes >> 9, req->sector);
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1715,11 +1736,16 @@ static void end_that_request_last(struct request *req, int error)
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
+		struct hd_struct *part = get_part(disk, req->sector);
 
-		__disk_stat_inc(disk, ios[rw]);
-		__disk_stat_add(disk, ticks[rw], duration);
+		__all_stat_inc(disk, ios[rw], req->sector);
+		__all_stat_add(disk, ticks[rw], duration, req->sector);
 		disk_round_stats(disk);
 		disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight--;
+		}
 	}
 
 	if (req->end_io)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 845ef8131108..d3b84bbb776a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	elv_merge_requests(q, req, next);
 
 	if (req->rq_disk) {
+		struct hd_struct *part
+			= get_part(req->rq_disk, req->sector);
 		disk_round_stats(req->rq_disk);
 		req->rq_disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight--;
+		}
 	}
 
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a64045ff845..f2ec7f1b0ec5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <linux/genhd.h>
 
 #include "check.h"
 
@@ -273,6 +274,7 @@ static struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	free_part_stats(p);
 	kfree(p);
 }
 
@@ -314,6 +316,7 @@ void delete_partition(struct gendisk *disk, int part)
 	p->nr_sects = 0;
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
+	part_stat_set_all(p, 0);
 	kobject_put(p->holder_dir);
 	device_del(&p->dev);
 	put_device(&p->dev);
@@ -336,6 +339,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 	if (!p)
 		return;
 
+	if (!init_part_stats(p)) {
+		kfree(p);
+		return;
+	}
 	p->start_sect = start;
 	p->nr_sects = len;
 	p->partno = part;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 589830aca99d..4cf25a5f4159 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -365,6 +365,7 @@ static inline void free_part_stats(struct hd_struct *part)
 
 /* drivers/block/ll_rw_blk.c */
 extern void disk_round_stats(struct gendisk *disk);
+extern void part_round_stats(struct hd_struct *part);
 
 /* drivers/block/genhd.c */
 extern int get_blkdev_list(char *, int);
-- 
cgit 


From c3c930d93365c495fbc1df28649da7cd4b97f4af Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Fri, 8 Feb 2008 12:06:21 +0100
Subject: Enhanced partition statistics: remove old partition statistics

Removes the now unused old partition statistic code.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      | 4 ----
 fs/partitions/check.c | 2 --
 include/linux/genhd.h | 1 -
 3 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2358fc5de5a4..e9754dc98ec4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1219,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio)
 
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
-		const int rw = bio_data_dir(bio);
-
-		p->sectors[rw] += bio_sectors(bio);
-		p->ios[rw]++;
 
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 950bdb4b8f53..03f808c5b79d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -330,8 +330,6 @@ void delete_partition(struct gendisk *disk, int part)
 	disk->part[part-1] = NULL;
 	p->start_sect = 0;
 	p->nr_sects = 0;
-	p->ios[0] = p->ios[1] = 0;
-	p->sectors[0] = p->sectors[1] = 0;
 	part_stat_set_all(p, 0);
 	kobject_put(p->holder_dir);
 	device_del(&p->dev);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4cf25a5f4159..09a3b18918c7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -105,7 +105,6 @@ struct hd_struct {
 	sector_t nr_sects;
 	struct device dev;
 	struct kobject *holder_dir;
-	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
-- 
cgit 


From a3d0c6aa1bb342b9b2c7b123b52ac2f48a4d4d0a Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Fri, 8 Feb 2008 04:18:18 -0800
Subject: hugetlb: add locking for overcommit sysctl

When I replaced hugetlb_dynamic_pool with nr_overcommit_hugepages I used
proc_doulongvec_minmax() directly.  However, hugetlb.c's locking rules
require that all counter modifications occur under the hugetlb_lock.  Add a
callback into the hugetlb code similar to the one for nr_hugepages.  Grab
the lock around the manipulation of nr_overcommit_hugepages in
proc_doulongvec_minmax().

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  1 +
 kernel/sysctl.c         |  2 +-
 mm/hugetlb.c            | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 30d606afcafe..7ca198b379af 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 }
 
 int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8c98d8147d88..9dadc9d88a03 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -982,7 +982,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &nr_overcommit_huge_pages,
 		.maxlen		= sizeof(nr_overcommit_huge_pages),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax,
+		.proc_handler	= &hugetlb_overcommit_handler,
 	},
 #endif
 	{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1a5642074e34..d9a380312467 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -605,6 +605,16 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
+int hugetlb_overcommit_handler(struct ctl_table *table, int write,
+			struct file *file, void __user *buffer,
+			size_t *length, loff_t *ppos)
+{
+	spin_lock(&hugetlb_lock);
+	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	spin_unlock(&hugetlb_lock);
+	return 0;
+}
+
 #endif /* CONFIG_SYSCTL */
 
 int hugetlb_report_meminfo(char *buf)
-- 
cgit 


From 58bfdd6deeec02b73691ea2c951a3c5d743bca63 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 8 Feb 2008 04:18:21 -0800
Subject: namespaces: move the UTS namespace under UTS_NS option

Currently all the namespace management code is in the kernel/utsname.c file,
so just compile it out and make stubs in the appropriate header.

The init namespace itself is in init/version.c and is in the kernel all the
time.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/utsname.h | 21 +++++++++++++++++++++
 init/Kconfig            |  7 +++++++
 kernel/Makefile         |  3 ++-
 3 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 923db99175f2..11232676bfff 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -35,6 +35,7 @@ struct new_utsname {
 #include <linux/sched.h>
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
+#include <linux/err.h>
 #include <asm/atomic.h>
 
 struct uts_namespace {
@@ -43,6 +44,7 @@ struct uts_namespace {
 };
 extern struct uts_namespace init_uts_ns;
 
+#ifdef CONFIG_UTS_NS
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
 	kref_get(&ns->kref);
@@ -56,6 +58,25 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
 	kref_put(&ns->kref, free_uts_ns);
 }
+#else
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline struct uts_namespace *copy_utsname(unsigned long flags,
+					struct uts_namespace *ns)
+{
+	if (flags & CLONE_NEWUTS)
+		return ERR_PTR(-EINVAL);
+
+	return ns;
+}
+#endif
+
 static inline struct new_utsname *utsname(void)
 {
 	return &current->nsproxy->uts_ns->name;
diff --git a/init/Kconfig b/init/Kconfig
index 7654207961a2..fef641af78c2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -429,6 +429,13 @@ config NAMESPACES
 	  or same user id or pid may refer to different tasks when used in
 	  different namespaces.
 
+config UTS_NS
+	bool "UTS namespace"
+	depends on NAMESPACES
+	help
+	  In this namespace tasks see different info provided with the
+	  uname() system call
+
 config BLK_DEV_INITRD
 	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
 	depends on BROKEN || !FRV
diff --git a/kernel/Makefile b/kernel/Makefile
index 685697c0a181..0f15bd409367 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o \
-	    utsname.o notifier.o ksysfs.o pm_qos_params.o
+	    notifier.o ksysfs.o pm_qos_params.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -33,6 +33,7 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
+obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
-- 
cgit 


From ae5e1b22f17983da929a0d0178896269e19da186 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 8 Feb 2008 04:18:22 -0800
Subject: namespaces: move the IPC namespace under IPC_NS option

Currently the IPC namespace management code is spread over the ipc/*.c files.
I moved this code into ipc/namespace.c file which is compiled out when needed.

The linux/ipc_namespace.h file is used to store the prototypes of the
functions in namespace.c and the stubs for NAMESPACES=n case.  This is done
so, because the stub for copy_ipc_namespace requires the knowledge of the
CLONE_NEWIPC flag, which is in sched.h.  But the linux/ipc.h file itself in
included into many many .c files via the sys.h->sem.h sequence so adding the
sched.h into it will make all these .c depend on sched.h which is not that
good.  On the other hand the knowledge about the namespaces stuff is required
in 4 .c files only.

Besides, this patch compiles out some auxiliary functions from ipc/sem.c,
msg.c and shm.c files.  It turned out that moving these functions into
namespaces.c is not that easy because they use many other calls and macros
from the original file.  Moving them would make this patch complicated.  On
the other hand all these functions can be consolidated, so I will send a
separate patch doing this a bit later.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc.h           | 52 ------------------------------
 include/linux/ipc_namespace.h | 69 ++++++++++++++++++++++++++++++++++++++++
 init/Kconfig                  |  7 +++++
 ipc/Makefile                  |  1 +
 ipc/ipc_sysctl.c              |  1 +
 ipc/msg.c                     |  3 ++
 ipc/namespace.c               | 73 +++++++++++++++++++++++++++++++++++++++++++
 ipc/sem.c                     |  3 ++
 ipc/shm.c                     |  3 ++
 ipc/util.c                    | 61 +-----------------------------------
 ipc/util.h                    |  2 ++
 kernel/nsproxy.c              |  1 +
 12 files changed, 164 insertions(+), 112 deletions(-)
 create mode 100644 include/linux/ipc_namespace.h
 create mode 100644 ipc/namespace.c

(limited to 'include/linux')

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 408696ea5189..b8826107b518 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -100,58 +100,6 @@ struct kern_ipc_perm
 	void		*security;
 };
 
-struct ipc_ids;
-struct ipc_namespace {
-	struct kref	kref;
-	struct ipc_ids	*ids[3];
-
-	int		sem_ctls[4];
-	int		used_sems;
-
-	int		msg_ctlmax;
-	int		msg_ctlmnb;
-	int		msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
-
-	size_t		shm_ctlmax;
-	size_t		shm_ctlall;
-	int		shm_ctlmni;
-	int		shm_tot;
-};
-
-extern struct ipc_namespace init_ipc_ns;
-
-#ifdef CONFIG_SYSVIPC
-#define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
-extern void free_ipc_ns(struct kref *kref);
-extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-						struct ipc_namespace *ns);
-#else
-#define INIT_IPC_NS(ns)
-static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
-						struct ipc_namespace *ns)
-{
-	return ns;
-}
-#endif
-
-static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
-{
-#ifdef CONFIG_SYSVIPC
-	if (ns)
-		kref_get(&ns->kref);
-#endif
-	return ns;
-}
-
-static inline void put_ipc_ns(struct ipc_namespace *ns)
-{
-#ifdef CONFIG_SYSVIPC
-	kref_put(&ns->kref, free_ipc_ns);
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
new file mode 100644
index 000000000000..a491fc9dd231
--- /dev/null
+++ b/include/linux/ipc_namespace.h
@@ -0,0 +1,69 @@
+#ifndef __IPC_NAMESPACE_H__
+#define __IPC_NAMESPACE_H__
+
+#include <linux/err.h>
+
+struct ipc_ids;
+struct ipc_namespace {
+	struct kref	kref;
+	struct ipc_ids	*ids[3];
+
+	int		sem_ctls[4];
+	int		used_sems;
+
+	int		msg_ctlmax;
+	int		msg_ctlmnb;
+	int		msg_ctlmni;
+	atomic_t	msg_bytes;
+	atomic_t	msg_hdrs;
+
+	size_t		shm_ctlmax;
+	size_t		shm_ctlall;
+	int		shm_ctlmni;
+	int		shm_tot;
+};
+
+extern struct ipc_namespace init_ipc_ns;
+
+#ifdef CONFIG_SYSVIPC
+#define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
+#else
+#define INIT_IPC_NS(ns)
+#endif
+
+#if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS)
+extern void free_ipc_ns(struct kref *kref);
+extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+						struct ipc_namespace *ns);
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+	kref_put(&ns->kref, free_ipc_ns);
+}
+#else
+static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+		struct ipc_namespace *ns)
+{
+	if (flags & CLONE_NEWIPC)
+		return ERR_PTR(-EINVAL);
+
+	return ns;
+}
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+	return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+}
+#endif
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index fef641af78c2..47879a874966 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -436,6 +436,13 @@ config UTS_NS
 	  In this namespace tasks see different info provided with the
 	  uname() system call
 
+config IPC_NS
+	bool "IPC namespace"
+	depends on NAMESPACES && SYSVIPC
+	help
+	  In this namespace tasks work with IPC ids which correspond to
+	  different IPC objects in different namespaces
+
 config BLK_DEV_INITRD
 	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
 	depends on BROKEN || !FRV
diff --git a/ipc/Makefile b/ipc/Makefile
index b93bba6652f1..5fc5e33ea047 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o
 obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
+obj-$(CONFIG_IPC_NS) += namespace.o
 
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 79e24e878c1e..7f4235bed51b 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -14,6 +14,7 @@
 #include <linux/nsproxy.h>
 #include <linux/sysctl.h>
 #include <linux/uaccess.h>
+#include <linux/ipc_namespace.h>
 
 static void *get_ipc(ctl_table *table)
 {
diff --git a/ipc/msg.c b/ipc/msg.c
index ec0c724054b9..5879bfeb79ca 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -36,6 +36,7 @@
 #include <linux/seq_file.h>
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
+#include <linux/ipc_namespace.h>
 
 #include <asm/current.h>
 #include <asm/uaccess.h>
@@ -90,6 +91,7 @@ static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
 	ipc_init_ids(ids);
 }
 
+#ifdef CONFIG_IPC_NS
 int msg_init_ns(struct ipc_namespace *ns)
 {
 	struct ipc_ids *ids;
@@ -128,6 +130,7 @@ void msg_exit_ns(struct ipc_namespace *ns)
 	kfree(ns->ids[IPC_MSG_IDS]);
 	ns->ids[IPC_MSG_IDS] = NULL;
 }
+#endif
 
 void __init msg_init(void)
 {
diff --git a/ipc/namespace.c b/ipc/namespace.c
new file mode 100644
index 000000000000..cef1139e6c96
--- /dev/null
+++ b/ipc/namespace.c
@@ -0,0 +1,73 @@
+/*
+ * linux/ipc/namespace.c
+ * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
+ */
+
+#include <linux/ipc.h>
+#include <linux/msg.h>
+#include <linux/ipc_namespace.h>
+#include <linux/rcupdate.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+
+#include "util.h"
+
+static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+{
+	int err;
+	struct ipc_namespace *ns;
+
+	err = -ENOMEM;
+	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
+	if (ns == NULL)
+		goto err_mem;
+
+	err = sem_init_ns(ns);
+	if (err)
+		goto err_sem;
+	err = msg_init_ns(ns);
+	if (err)
+		goto err_msg;
+	err = shm_init_ns(ns);
+	if (err)
+		goto err_shm;
+
+	kref_init(&ns->kref);
+	return ns;
+
+err_shm:
+	msg_exit_ns(ns);
+err_msg:
+	sem_exit_ns(ns);
+err_sem:
+	kfree(ns);
+err_mem:
+	return ERR_PTR(err);
+}
+
+struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
+{
+	struct ipc_namespace *new_ns;
+
+	BUG_ON(!ns);
+	get_ipc_ns(ns);
+
+	if (!(flags & CLONE_NEWIPC))
+		return ns;
+
+	new_ns = clone_ipc_ns(ns);
+
+	put_ipc_ns(ns);
+	return new_ns;
+}
+
+void free_ipc_ns(struct kref *kref)
+{
+	struct ipc_namespace *ns;
+
+	ns = container_of(kref, struct ipc_namespace, kref);
+	sem_exit_ns(ns);
+	msg_exit_ns(ns);
+	shm_exit_ns(ns);
+	kfree(ns);
+}
diff --git a/ipc/sem.c b/ipc/sem.c
index d65e285b7e30..84c701fe5004 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -82,6 +82,7 @@
 #include <linux/seq_file.h>
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
+#include <linux/ipc_namespace.h>
 
 #include <asm/uaccess.h>
 #include "util.h"
@@ -128,6 +129,7 @@ static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
 	ipc_init_ids(ids);
 }
 
+#ifdef CONFIG_IPC_NS
 int sem_init_ns(struct ipc_namespace *ns)
 {
 	struct ipc_ids *ids;
@@ -165,6 +167,7 @@ void sem_exit_ns(struct ipc_namespace *ns)
 	kfree(ns->ids[IPC_SEM_IDS]);
 	ns->ids[IPC_SEM_IDS] = NULL;
 }
+#endif
 
 void __init sem_init (void)
 {
diff --git a/ipc/shm.c b/ipc/shm.c
index 65c3a294aba5..07f4b7abc80a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
+#include <linux/ipc_namespace.h>
 
 #include <asm/uaccess.h>
 
@@ -96,6 +97,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 		shm_destroy(ns, shp);
 }
 
+#ifdef CONFIG_IPC_NS
 int shm_init_ns(struct ipc_namespace *ns)
 {
 	struct ipc_ids *ids;
@@ -133,6 +135,7 @@ void shm_exit_ns(struct ipc_namespace *ns)
 	kfree(ns->ids[IPC_SHM_IDS]);
 	ns->ids[IPC_SHM_IDS] = NULL;
 }
+#endif
 
 void __init shm_init (void)
 {
diff --git a/ipc/util.c b/ipc/util.c
index 76c1f3461e22..5432b8e34c9b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -33,6 +33,7 @@
 #include <linux/audit.h>
 #include <linux/nsproxy.h>
 #include <linux/rwsem.h>
+#include <linux/ipc_namespace.h>
 
 #include <asm/unistd.h>
 
@@ -51,66 +52,6 @@ struct ipc_namespace init_ipc_ns = {
 	},
 };
 
-static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
-{
-	int err;
-	struct ipc_namespace *ns;
-
-	err = -ENOMEM;
-	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
-	if (ns == NULL)
-		goto err_mem;
-
-	err = sem_init_ns(ns);
-	if (err)
-		goto err_sem;
-	err = msg_init_ns(ns);
-	if (err)
-		goto err_msg;
-	err = shm_init_ns(ns);
-	if (err)
-		goto err_shm;
-
-	kref_init(&ns->kref);
-	return ns;
-
-err_shm:
-	msg_exit_ns(ns);
-err_msg:
-	sem_exit_ns(ns);
-err_sem:
-	kfree(ns);
-err_mem:
-	return ERR_PTR(err);
-}
-
-struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
-{
-	struct ipc_namespace *new_ns;
-
-	BUG_ON(!ns);
-	get_ipc_ns(ns);
-
-	if (!(flags & CLONE_NEWIPC))
-		return ns;
-
-	new_ns = clone_ipc_ns(ns);
-
-	put_ipc_ns(ns);
-	return new_ns;
-}
-
-void free_ipc_ns(struct kref *kref)
-{
-	struct ipc_namespace *ns;
-
-	ns = container_of(kref, struct ipc_namespace, kref);
-	sem_exit_ns(ns);
-	msg_exit_ns(ns);
-	shm_exit_ns(ns);
-	kfree(ns);
-}
-
 /**
  *	ipc_init	-	initialise IPC subsystem
  *
diff --git a/ipc/util.h b/ipc/util.h
index 9ffea40457ce..fc6b7294f764 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -20,6 +20,8 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+struct ipc_namespace;
+
 int sem_init_ns(struct ipc_namespace *ns);
 int msg_init_ns(struct ipc_namespace *ns);
 int shm_init_ns(struct ipc_namespace *ns);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 79f871bc0ef4..f5d332cf8c63 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -21,6 +21,7 @@
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <net/net_namespace.h>
+#include <linux/ipc_namespace.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
-- 
cgit 


From 74bd59bb39eb08b4379e2590c5f160748d83f812 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 8 Feb 2008 04:18:24 -0800
Subject: namespaces: cleanup the code managed with PID_NS option

Just like with the user namespaces, move the namespace management code into
the separate .c file and mark the (already existing) PID_NS option as "depend
on NAMESPACES"

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h           |   2 +-
 include/linux/pid_namespace.h |   6 ++
 init/Kconfig                  |  24 ++---
 kernel/Makefile               |   1 +
 kernel/pid.c                  | 184 +--------------------------------------
 kernel/pid_namespace.c        | 197 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 220 insertions(+), 194 deletions(-)
 create mode 100644 kernel/pid_namespace.c

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index e29a900a8499..061abb6c0796 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -118,10 +118,10 @@ extern struct pid *find_pid(int nr);
  */
 extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
+int next_pidmap(struct pid_namespace *pid_ns, int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void FASTCALL(free_pid(struct pid *pid));
-extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 
 /*
  * the helpers to get the pid's id seen from different namespaces
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1689e28483e4..fcd61fa2c833 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -39,6 +39,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
 extern void free_pid_ns(struct kref *kref);
+extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
@@ -66,6 +67,11 @@ static inline void put_pid_ns(struct pid_namespace *ns)
 {
 }
 
+
+static inline void zap_pid_ns_processes(struct pid_namespace *ns)
+{
+	BUG();
+}
 #endif /* CONFIG_PID_NS */
 
 static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
diff --git a/init/Kconfig b/init/Kconfig
index 2a6499d6f283..455170e1c1e3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -214,18 +214,6 @@ config TASK_IO_ACCOUNTING
 
 	  Say N if unsure.
 
-config PID_NS
-	bool "PID Namespaces (EXPERIMENTAL)"
-	default n
-	depends on EXPERIMENTAL
-	help
-	  Suport process id namespaces.  This allows having multiple
-	  process with the same pid as long as they are in different
-	  pid namespaces.  This is a building block of containers.
-
-	  Unless you want to work with an experimental feature
-	  say N here.
-
 config AUDIT
 	bool "Auditing support"
 	depends on NET
@@ -442,6 +430,18 @@ config USER_NS
 	  to provide different user info for different servers.
 	  If unsure, say N.
 
+config PID_NS
+	bool "PID Namespaces (EXPERIMENTAL)"
+	default n
+	depends on NAMESPACES && EXPERIMENTAL
+	help
+	  Suport process id namespaces.  This allows having multiple
+	  process with the same pid as long as they are in different
+	  pid namespaces.  This is a building block of containers.
+
+	  Unless you want to work with an experimental feature
+	  say N here.
+
 config BLK_DEV_INITRD
 	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
 	depends on BROKEN || !FRV
diff --git a/kernel/Makefile b/kernel/Makefile
index 30a957a35c91..60cd39c84e6d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
+obj-$(CONFIG_PID_NS) += pid_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
diff --git a/kernel/pid.c b/kernel/pid.c
index 3b30bccdfcdc..939746fb4ce7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,7 +41,6 @@
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
 struct pid init_struct_pid = INIT_STRUCT_PID;
-static struct kmem_cache *pid_ns_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	return -1;
 }
 
-static int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, int last)
 {
 	int offset;
 	struct pidmap *map, *end;
@@ -488,180 +487,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 }
 EXPORT_SYMBOL_GPL(find_get_pid);
 
-struct pid_cache {
-	int nr_ids;
-	char name[16];
-	struct kmem_cache *cachep;
-	struct list_head list;
-};
-
-static LIST_HEAD(pid_caches_lh);
-static DEFINE_MUTEX(pid_caches_mutex);
-
-/*
- * creates the kmem cache to allocate pids from.
- * @nr_ids: the number of numerical ids this pid will have to carry
- */
-
-static struct kmem_cache *create_pid_cachep(int nr_ids)
-{
-	struct pid_cache *pcache;
-	struct kmem_cache *cachep;
-
-	mutex_lock(&pid_caches_mutex);
-	list_for_each_entry (pcache, &pid_caches_lh, list)
-		if (pcache->nr_ids == nr_ids)
-			goto out;
-
-	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
-	if (pcache == NULL)
-		goto err_alloc;
-
-	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
-	cachep = kmem_cache_create(pcache->name,
-			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
-			0, SLAB_HWCACHE_ALIGN, NULL);
-	if (cachep == NULL)
-		goto err_cachep;
-
-	pcache->nr_ids = nr_ids;
-	pcache->cachep = cachep;
-	list_add(&pcache->list, &pid_caches_lh);
-out:
-	mutex_unlock(&pid_caches_mutex);
-	return pcache->cachep;
-
-err_cachep:
-	kfree(pcache);
-err_alloc:
-	mutex_unlock(&pid_caches_mutex);
-	return NULL;
-}
-
-#ifdef CONFIG_PID_NS
-static struct pid_namespace *create_pid_namespace(int level)
-{
-	struct pid_namespace *ns;
-	int i;
-
-	ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
-	if (ns == NULL)
-		goto out;
-
-	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!ns->pidmap[0].page)
-		goto out_free;
-
-	ns->pid_cachep = create_pid_cachep(level + 1);
-	if (ns->pid_cachep == NULL)
-		goto out_free_map;
-
-	kref_init(&ns->kref);
-	ns->last_pid = 0;
-	ns->child_reaper = NULL;
-	ns->level = level;
-
-	set_bit(0, ns->pidmap[0].page);
-	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
-
-	for (i = 1; i < PIDMAP_ENTRIES; i++) {
-		ns->pidmap[i].page = 0;
-		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-	}
-
-	return ns;
-
-out_free_map:
-	kfree(ns->pidmap[0].page);
-out_free:
-	kmem_cache_free(pid_ns_cachep, ns);
-out:
-	return ERR_PTR(-ENOMEM);
-}
-
-static void destroy_pid_namespace(struct pid_namespace *ns)
-{
-	int i;
-
-	for (i = 0; i < PIDMAP_ENTRIES; i++)
-		kfree(ns->pidmap[i].page);
-	kmem_cache_free(pid_ns_cachep, ns);
-}
-
-struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
-{
-	struct pid_namespace *new_ns;
-
-	BUG_ON(!old_ns);
-	new_ns = get_pid_ns(old_ns);
-	if (!(flags & CLONE_NEWPID))
-		goto out;
-
-	new_ns = ERR_PTR(-EINVAL);
-	if (flags & CLONE_THREAD)
-		goto out_put;
-
-	new_ns = create_pid_namespace(old_ns->level + 1);
-	if (!IS_ERR(new_ns))
-		new_ns->parent = get_pid_ns(old_ns);
-
-out_put:
-	put_pid_ns(old_ns);
-out:
-	return new_ns;
-}
-
-void free_pid_ns(struct kref *kref)
-{
-	struct pid_namespace *ns, *parent;
-
-	ns = container_of(kref, struct pid_namespace, kref);
-
-	parent = ns->parent;
-	destroy_pid_namespace(ns);
-
-	if (parent != NULL)
-		put_pid_ns(parent);
-}
-#endif /* CONFIG_PID_NS */
-
-void zap_pid_ns_processes(struct pid_namespace *pid_ns)
-{
-	int nr;
-	int rc;
-
-	/*
-	 * The last thread in the cgroup-init thread group is terminating.
-	 * Find remaining pid_ts in the namespace, signal and wait for them
-	 * to exit.
-	 *
-	 * Note:  This signals each threads in the namespace - even those that
-	 * 	  belong to the same thread group, To avoid this, we would have
-	 * 	  to walk the entire tasklist looking a processes in this
-	 * 	  namespace, but that could be unnecessarily expensive if the
-	 * 	  pid namespace has just a few processes. Or we need to
-	 * 	  maintain a tasklist for each pid namespace.
-	 *
-	 */
-	read_lock(&tasklist_lock);
-	nr = next_pidmap(pid_ns, 1);
-	while (nr > 0) {
-		kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
-		nr = next_pidmap(pid_ns, nr);
-	}
-	read_unlock(&tasklist_lock);
-
-	do {
-		clear_thread_flag(TIF_SIGPENDING);
-		rc = sys_wait4(-1, NULL, __WALL, NULL);
-	} while (rc != -ECHILD);
-
-
-	/* Child reaper for the pid namespace is going away */
-	pid_ns->child_reaper = NULL;
-	return;
-}
-
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -694,9 +519,6 @@ void __init pidmap_init(void)
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
-	init_pid_ns.pid_cachep = create_pid_cachep(1);
-	if (init_pid_ns.pid_cachep == NULL)
-		panic("Can't create pid_1 cachep\n");
-
-	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
new file mode 100644
index 000000000000..6d792b66d854
--- /dev/null
+++ b/kernel/pid_namespace.c
@@ -0,0 +1,197 @@
+/*
+ * Pid namespaces
+ *
+ * Authors:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
+ */
+
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/syscalls.h>
+#include <linux/err.h>
+
+#define BITS_PER_PAGE		(PAGE_SIZE*8)
+
+struct pid_cache {
+	int nr_ids;
+	char name[16];
+	struct kmem_cache *cachep;
+	struct list_head list;
+};
+
+static LIST_HEAD(pid_caches_lh);
+static DEFINE_MUTEX(pid_caches_mutex);
+static struct kmem_cache *pid_ns_cachep;
+
+/*
+ * creates the kmem cache to allocate pids from.
+ * @nr_ids: the number of numerical ids this pid will have to carry
+ */
+
+static struct kmem_cache *create_pid_cachep(int nr_ids)
+{
+	struct pid_cache *pcache;
+	struct kmem_cache *cachep;
+
+	mutex_lock(&pid_caches_mutex);
+	list_for_each_entry(pcache, &pid_caches_lh, list)
+		if (pcache->nr_ids == nr_ids)
+			goto out;
+
+	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
+	if (pcache == NULL)
+		goto err_alloc;
+
+	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
+	cachep = kmem_cache_create(pcache->name,
+			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (cachep == NULL)
+		goto err_cachep;
+
+	pcache->nr_ids = nr_ids;
+	pcache->cachep = cachep;
+	list_add(&pcache->list, &pid_caches_lh);
+out:
+	mutex_unlock(&pid_caches_mutex);
+	return pcache->cachep;
+
+err_cachep:
+	kfree(pcache);
+err_alloc:
+	mutex_unlock(&pid_caches_mutex);
+	return NULL;
+}
+
+static struct pid_namespace *create_pid_namespace(int level)
+{
+	struct pid_namespace *ns;
+	int i;
+
+	ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+	if (ns == NULL)
+		goto out;
+
+	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!ns->pidmap[0].page)
+		goto out_free;
+
+	ns->pid_cachep = create_pid_cachep(level + 1);
+	if (ns->pid_cachep == NULL)
+		goto out_free_map;
+
+	kref_init(&ns->kref);
+	ns->last_pid = 0;
+	ns->child_reaper = NULL;
+	ns->level = level;
+
+	set_bit(0, ns->pidmap[0].page);
+	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+	for (i = 1; i < PIDMAP_ENTRIES; i++) {
+		ns->pidmap[i].page = 0;
+		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+	}
+
+	return ns;
+
+out_free_map:
+	kfree(ns->pidmap[0].page);
+out_free:
+	kmem_cache_free(pid_ns_cachep, ns);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+	int i;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		kfree(ns->pidmap[i].page);
+	kmem_cache_free(pid_ns_cachep, ns);
+}
+
+struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+{
+	struct pid_namespace *new_ns;
+
+	BUG_ON(!old_ns);
+	new_ns = get_pid_ns(old_ns);
+	if (!(flags & CLONE_NEWPID))
+		goto out;
+
+	new_ns = ERR_PTR(-EINVAL);
+	if (flags & CLONE_THREAD)
+		goto out_put;
+
+	new_ns = create_pid_namespace(old_ns->level + 1);
+	if (!IS_ERR(new_ns))
+		new_ns->parent = get_pid_ns(old_ns);
+
+out_put:
+	put_pid_ns(old_ns);
+out:
+	return new_ns;
+}
+
+void free_pid_ns(struct kref *kref)
+{
+	struct pid_namespace *ns, *parent;
+
+	ns = container_of(kref, struct pid_namespace, kref);
+
+	parent = ns->parent;
+	destroy_pid_namespace(ns);
+
+	if (parent != NULL)
+		put_pid_ns(parent);
+}
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+	int nr;
+	int rc;
+
+	/*
+	 * The last thread in the cgroup-init thread group is terminating.
+	 * Find remaining pid_ts in the namespace, signal and wait for them
+	 * to exit.
+	 *
+	 * Note:  This signals each threads in the namespace - even those that
+	 * 	  belong to the same thread group, To avoid this, we would have
+	 * 	  to walk the entire tasklist looking a processes in this
+	 * 	  namespace, but that could be unnecessarily expensive if the
+	 * 	  pid namespace has just a few processes. Or we need to
+	 * 	  maintain a tasklist for each pid namespace.
+	 *
+	 */
+	read_lock(&tasklist_lock);
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+		nr = next_pidmap(pid_ns, nr);
+	}
+	read_unlock(&tasklist_lock);
+
+	do {
+		clear_thread_flag(TIF_SIGPENDING);
+		rc = sys_wait4(-1, NULL, __WALL, NULL);
+	} while (rc != -ECHILD);
+
+
+	/* Child reaper for the pid namespace is going away */
+	pid_ns->child_reaper = NULL;
+	return;
+}
+
+static __init int pid_namespaces_init(void)
+{
+	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+	return 0;
+}
+
+__initcall(pid_namespaces_init);
-- 
cgit 


From be614086a4aff163d5aa0dc160638d1193b59cde Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Feb 2008 04:18:30 -0800
Subject: proc: implement proc_single_file_operations

Currently many /proc/pid files use a crufty precursor to the current seq_file
api, and they don't have direct access to the pid_namespace or the pid of for
which they are displaying data.

So implement proc_single_file_operations to make the seq_file routines easy to
use, and to give access to the full state of the pid of we are displaying data
for.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c          | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |  3 +++
 2 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index c59852b38787..f4b1e14bd95b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -121,6 +121,10 @@ struct pid_entry {
 	NOD(NAME, (S_IFREG|(MODE)), 			\
 		NULL, &proc_info_file_operations,	\
 		{ .proc_read = &proc_##OTYPE } )
+#define ONE(NAME, MODE, OTYPE)				\
+	NOD(NAME, (S_IFREG|(MODE)), 			\
+		NULL, &proc_single_file_operations,	\
+		{ .proc_show = &proc_##OTYPE } )
 
 int maps_protect;
 EXPORT_SYMBOL(maps_protect);
@@ -658,6 +662,45 @@ static const struct file_operations proc_info_file_operations = {
 	.read		= proc_info_read,
 };
 
+static int proc_single_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct pid_namespace *ns;
+	struct pid *pid;
+	struct task_struct *task;
+	int ret;
+
+	ns = inode->i_sb->s_fs_info;
+	pid = proc_pid(inode);
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task)
+		return -ESRCH;
+
+	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
+
+	put_task_struct(task);
+	return ret;
+}
+
+static int proc_single_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	ret = single_open(filp, proc_single_show, NULL);
+	if (!ret) {
+		struct seq_file *m = filp->private_data;
+
+		m->private = inode;
+	}
+	return ret;
+}
+
+static const struct file_operations proc_single_file_operations = {
+	.open		= proc_single_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int mem_open(struct inode* inode, struct file* file)
 {
 	file->private_data = (void*)((long)current->self_exec_id);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e43551516831..b04ebf09fb91 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -262,6 +262,9 @@ extern void kclist_add(struct kcore_list *, void *, size_t);
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
 	int (*proc_read)(struct task_struct *task, char *page);
+	int (*proc_show)(struct seq_file *m,
+		struct pid_namespace *ns, struct pid *pid,
+		struct task_struct *task);
 };
 
 struct proc_inode {
-- 
cgit 


From df5f8314ca30d6a76735748e5ba4ca9809c0f434 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Feb 2008 04:18:33 -0800
Subject: proc: seqfile convert proc_pid_status to properly handle pid
 namespaces

Currently we possibly lookup the pid in the wrong pid namespace.  So
seq_file convert proc_pid_status which ensures the proper pid namespaces is
passed in.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: another build fix]
[akpm@linux-foundation.org: s390 build fix]
[akpm@linux-foundation.org: fix task_name() output]
[akpm@linux-foundation.org: fix nommu build]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andrew Morgan <morgan@kernel.org>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kernel/traps.c     |  24 ++++----
 fs/proc/array.c              | 128 +++++++++++++++++++++----------------------
 fs/proc/base.c               |   4 +-
 fs/proc/internal.h           |   3 +-
 fs/proc/task_mmu.c           |   6 +-
 fs/proc/task_nommu.c         |   5 +-
 include/asm-s390/processor.h |   3 +-
 include/linux/cpuset.h       |   9 +--
 include/linux/proc_fs.h      |   3 +-
 kernel/cpuset.c              |  19 ++++---
 10 files changed, 103 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1a2fdb6991df..a4d29025ddbd 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -24,6 +24,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/seq_file.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
@@ -218,41 +219,40 @@ void show_registers(struct pt_regs *regs)
 }	
 
 /* This is called from fs/proc/array.c */
-char *task_show_regs(struct task_struct *task, char *buffer)
+void task_show_regs(struct seq_file *m, struct task_struct *task)
 {
 	struct pt_regs *regs;
 
 	regs = task_pt_regs(task);
-	buffer += sprintf(buffer, "task: %p, ksp: %p\n",
+	seq_printf(m, "task: %p, ksp: %p\n",
 		       task, (void *)task->thread.ksp);
-	buffer += sprintf(buffer, "User PSW : %p %p\n",
+	seq_printf(m, "User PSW : %p %p\n",
 		       (void *) regs->psw.mask, (void *)regs->psw.addr);
 
-	buffer += sprintf(buffer, "User GPRS: " FOURLONG,
+	seq_printf(m, "User GPRS: " FOURLONG,
 			  regs->gprs[0], regs->gprs[1],
 			  regs->gprs[2], regs->gprs[3]);
-	buffer += sprintf(buffer, "           " FOURLONG,
+	seq_printf(m, "           " FOURLONG,
 			  regs->gprs[4], regs->gprs[5],
 			  regs->gprs[6], regs->gprs[7]);
-	buffer += sprintf(buffer, "           " FOURLONG,
+	seq_printf(m, "           " FOURLONG,
 			  regs->gprs[8], regs->gprs[9],
 			  regs->gprs[10], regs->gprs[11]);
-	buffer += sprintf(buffer, "           " FOURLONG,
+	seq_printf(m, "           " FOURLONG,
 			  regs->gprs[12], regs->gprs[13],
 			  regs->gprs[14], regs->gprs[15]);
-	buffer += sprintf(buffer, "User ACRS: %08x %08x %08x %08x\n",
+	seq_printf(m, "User ACRS: %08x %08x %08x %08x\n",
 			  task->thread.acrs[0], task->thread.acrs[1],
 			  task->thread.acrs[2], task->thread.acrs[3]);
-	buffer += sprintf(buffer, "           %08x %08x %08x %08x\n",
+	seq_printf(m, "           %08x %08x %08x %08x\n",
 			  task->thread.acrs[4], task->thread.acrs[5],
 			  task->thread.acrs[6], task->thread.acrs[7]);
-	buffer += sprintf(buffer, "           %08x %08x %08x %08x\n",
+	seq_printf(m, "           %08x %08x %08x %08x\n",
 			  task->thread.acrs[8], task->thread.acrs[9],
 			  task->thread.acrs[10], task->thread.acrs[11]);
-	buffer += sprintf(buffer, "           %08x %08x %08x %08x\n",
+	seq_printf(m, "           %08x %08x %08x %08x\n",
 			  task->thread.acrs[12], task->thread.acrs[13],
 			  task->thread.acrs[14], task->thread.acrs[15]);
-	return buffer;
 }
 
 static DEFINE_SPINLOCK(die_lock);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5540e9575c6d..07d6c4853fe8 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -89,18 +89,21 @@
 do { memcpy(buffer, string, strlen(string)); \
      buffer += strlen(string); } while (0)
 
-static inline char *task_name(struct task_struct *p, char *buf)
+static inline void task_name(struct seq_file *m, struct task_struct *p)
 {
 	int i;
+	char *buf, *end;
 	char *name;
 	char tcomm[sizeof(p->comm)];
 
 	get_task_comm(tcomm, p);
 
-	ADDBUF(buf, "Name:\t");
+	seq_printf(m, "Name:\t");
+	end = m->buf + m->size;
+	buf = m->buf + m->count;
 	name = tcomm;
 	i = sizeof(tcomm);
-	do {
+	while (i && (buf < end)) {
 		unsigned char c = *name;
 		name++;
 		i--;
@@ -108,20 +111,21 @@ static inline char *task_name(struct task_struct *p, char *buf)
 		if (!c)
 			break;
 		if (c == '\\') {
-			buf[1] = c;
-			buf += 2;
+			buf++;
+			if (buf < end)
+				*buf++ = c;
 			continue;
 		}
 		if (c == '\n') {
-			buf[0] = '\\';
-			buf[1] = 'n';
-			buf += 2;
+			*buf++ = '\\';
+			if (buf < end)
+				*buf++ = 'n';
 			continue;
 		}
 		buf++;
-	} while (i);
-	*buf = '\n';
-	return buf+1;
+	}
+	m->count = buf - m->buf;
+	seq_printf(m, "\n");
 }
 
 /*
@@ -152,21 +156,20 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	return *p;
 }
 
-static inline char *task_state(struct task_struct *p, char *buffer)
+static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
+				struct pid *pid, struct task_struct *p)
 {
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
-	struct pid_namespace *ns;
 	pid_t ppid, tpid;
 
-	ns = current->nsproxy->pid_ns;
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
 	tpid = pid_alive(p) && p->ptrace ?
 		task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
-	buffer += sprintf(buffer,
+	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
@@ -176,7 +179,7 @@ static inline char *task_state(struct task_struct *p, char *buffer)
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		task_tgid_nr_ns(p, ns),
-		task_pid_nr_ns(p, ns),
+		pid_nr_ns(pid, ns),
 		ppid, tpid,
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
@@ -184,7 +187,7 @@ static inline char *task_state(struct task_struct *p, char *buffer)
 	task_lock(p);
 	if (p->files)
 		fdt = files_fdtable(p->files);
-	buffer += sprintf(buffer,
+	seq_printf(m,
 		"FDSize:\t%d\n"
 		"Groups:\t",
 		fdt ? fdt->max_fds : 0);
@@ -195,20 +198,18 @@ static inline char *task_state(struct task_struct *p, char *buffer)
 	task_unlock(p);
 
 	for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
-		buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g));
+		seq_printf(m, "%d ", GROUP_AT(group_info, g));
 	put_group_info(group_info);
 
-	buffer += sprintf(buffer, "\n");
-	return buffer;
+	seq_printf(m, "\n");
 }
 
-static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
+static void render_sigset_t(struct seq_file *m, const char *header,
+				sigset_t *set)
 {
-	int i, len;
+	int i;
 
-	len = strlen(header);
-	memcpy(buffer, header, len);
-	buffer += len;
+	seq_printf(m, "%s", header);
 
 	i = _NSIG;
 	do {
@@ -219,12 +220,10 @@ static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
 		if (sigismember(set, i+2)) x |= 2;
 		if (sigismember(set, i+3)) x |= 4;
 		if (sigismember(set, i+4)) x |= 8;
-		*buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
+		seq_printf(m, "%x", x);
 	} while (i >= 4);
 
-	*buffer++ = '\n';
-	*buffer = 0;
-	return buffer;
+	seq_printf(m, "\n");
 }
 
 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -242,7 +241,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 	}
 }
 
-static inline char *task_sig(struct task_struct *p, char *buffer)
+static inline void task_sig(struct seq_file *m, struct task_struct *p)
 {
 	unsigned long flags;
 	sigset_t pending, shpending, blocked, ignored, caught;
@@ -269,67 +268,66 @@ static inline char *task_sig(struct task_struct *p, char *buffer)
 	}
 	rcu_read_unlock();
 
-	buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
-	buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
+	seq_printf(m, "Threads:\t%d\n", num_threads);
+	seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
 
 	/* render them all */
-	buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
-	buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer);
-	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
-	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
-	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
-
-	return buffer;
+	render_sigset_t(m, "SigPnd:\t", &pending);
+	render_sigset_t(m, "ShdPnd:\t", &shpending);
+	render_sigset_t(m, "SigBlk:\t", &blocked);
+	render_sigset_t(m, "SigIgn:\t", &ignored);
+	render_sigset_t(m, "SigCgt:\t", &caught);
 }
 
-static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer)
+static void render_cap_t(struct seq_file *m, const char *header,
+			kernel_cap_t *a)
 {
 	unsigned __capi;
 
-	buffer += sprintf(buffer, "%s", header);
+	seq_printf(m, "%s", header);
 	CAP_FOR_EACH_U32(__capi) {
-		buffer += sprintf(buffer, "%08x",
-				  a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+		seq_printf(m, "%08x",
+			   a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
 	}
-	return buffer + sprintf(buffer, "\n");
+	seq_printf(m, "\n");
 }
 
-static inline char *task_cap(struct task_struct *p, char *buffer)
+static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
-	buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer);
-	buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer);
-	return render_cap_t("CapEff:\t", &p->cap_effective, buffer);
+	render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
+	render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
+	render_cap_t(m, "CapEff:\t", &p->cap_effective);
 }
 
-static inline char *task_context_switch_counts(struct task_struct *p,
-						char *buffer)
+static inline void task_context_switch_counts(struct seq_file *m,
+						struct task_struct *p)
 {
-	return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
-			    "nonvoluntary_ctxt_switches:\t%lu\n",
-			    p->nvcsw,
-			    p->nivcsw);
+	seq_printf(m,	"voluntary_ctxt_switches:\t%lu\n"
+			"nonvoluntary_ctxt_switches:\t%lu\n",
+			p->nvcsw,
+			p->nivcsw);
 }
 
-int proc_pid_status(struct task_struct *task, char *buffer)
+int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+			struct pid *pid, struct task_struct *task)
 {
-	char *orig = buffer;
 	struct mm_struct *mm = get_task_mm(task);
 
-	buffer = task_name(task, buffer);
-	buffer = task_state(task, buffer);
+	task_name(m, task);
+	task_state(m, ns, pid, task);
 
 	if (mm) {
-		buffer = task_mem(mm, buffer);
+		task_mem(m, mm);
 		mmput(mm);
 	}
-	buffer = task_sig(task, buffer);
-	buffer = task_cap(task, buffer);
-	buffer = cpuset_task_status_allowed(task, buffer);
+	task_sig(m, task);
+	task_cap(m, task);
+	cpuset_task_status_allowed(m, task);
 #if defined(CONFIG_S390)
-	buffer = task_show_regs(task, buffer);
+	task_show_regs(m, task);
 #endif
-	buffer = task_context_switch_counts(task, buffer);
-	return buffer - orig;
+	task_context_switch_counts(m, task);
+	return 0;
 }
 
 /*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9c3e548a6754..8a19a8a1a3e6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2274,7 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
-	INF("status",     S_IRUGO, pid_status),
+	ONE("status",     S_IRUGO, pid_status),
 	INF("limits",	  S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
@@ -2605,7 +2605,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	DIR("fdinfo",    S_IRUSR|S_IXUSR, fdinfo),
 	REG("environ",   S_IRUSR, environ),
 	INF("auxv",      S_IRUSR, pid_auxv),
-	INF("status",    S_IRUGO, pid_status),
+	ONE("status",    S_IRUGO, pid_status),
 	INF("limits",	 S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 45bdbfc704e7..ea496ffeabe7 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -53,7 +53,8 @@ extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *task);
 extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *task);
-extern int proc_pid_status(struct task_struct *, char *);
+extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+				struct pid *pid, struct task_struct *task);
 extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *task);
 extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 38338ed98cc6..a34c440f8d25 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -9,13 +9,14 @@
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/seq_file.h>
 
 #include <asm/elf.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
-char *task_mem(struct mm_struct *mm, char *buffer)
+void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long data, text, lib;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
@@ -37,7 +38,7 @@ char *task_mem(struct mm_struct *mm, char *buffer)
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
-	buffer += sprintf(buffer,
+	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
 		"VmLck:\t%8lu kB\n"
@@ -56,7 +57,6 @@ char *task_mem(struct mm_struct *mm, char *buffer)
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
 		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
-	return buffer;
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1932c2ca3457..cee0231c6cec 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -12,7 +12,7 @@
  * each process that owns it. Non-shared memory is counted
  * accurately.
  */
-char *task_mem(struct mm_struct *mm, char *buffer)
+void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	struct vm_list_struct *vml;
 	unsigned long bytes = 0, sbytes = 0, slack = 0;
@@ -58,14 +58,13 @@ char *task_mem(struct mm_struct *mm, char *buffer)
 
 	bytes += kobjsize(current); /* includes kernel stack */
 
-	buffer += sprintf(buffer,
+	seq_printf(m,
 		"Mem:\t%8lu bytes\n"
 		"Slack:\t%8lu bytes\n"
 		"Shared:\t%8lu bytes\n",
 		bytes, slack, sbytes);
 
 	up_read(&mm->mmap_sem);
-	return buffer;
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 4f744609cd11..dfec2d011c0d 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -161,6 +161,7 @@ struct stack_frame {
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
+struct seq_file;
 
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
@@ -177,7 +178,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
 /*
  * Print register of task into buffer. Used in fs/proc/array.c.
  */
-extern char *task_show_regs(struct task_struct *task, char *buffer);
+extern void task_show_regs(struct seq_file *m, struct task_struct *task);
 
 extern void show_registers(struct pt_regs *regs);
 extern void show_code(struct pt_regs *regs);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ecae585ec3da..f8c9a2752f06 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -57,7 +57,9 @@ extern int cpuset_memory_pressure_enabled;
 extern void __cpuset_memory_pressure_bump(void);
 
 extern const struct file_operations proc_cpuset_operations;
-extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
+struct seq_file;
+extern void cpuset_task_status_allowed(struct seq_file *m,
+					struct task_struct *task);
 
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
@@ -126,10 +128,9 @@ static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 
 static inline void cpuset_memory_pressure_bump(void) {}
 
-static inline char *cpuset_task_status_allowed(struct task_struct *task,
-							char *buffer)
+static inline void cpuset_task_status_allowed(struct seq_file *m,
+						struct task_struct *task)
 {
-	return buffer;
 }
 
 static inline void cpuset_lock(void) {}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b04ebf09fb91..e4a8f9aef188 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -118,7 +118,8 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
-char *task_mem(struct mm_struct *, char *);
+void task_mem(struct seq_file *, struct mm_struct *);
+void clear_refs_smap(struct mm_struct *mm);
 
 struct proc_dir_entry *de_get(struct proc_dir_entry *de);
 void de_put(struct proc_dir_entry *de);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 67b2bfe27814..3e296ed81d4d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2255,13 +2255,14 @@ const struct file_operations proc_cpuset_operations = {
 #endif /* CONFIG_PROC_PID_CPUSET */
 
 /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
-char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
-{
-	buffer += sprintf(buffer, "Cpus_allowed:\t");
-	buffer += cpumask_scnprintf(buffer, PAGE_SIZE, task->cpus_allowed);
-	buffer += sprintf(buffer, "\n");
-	buffer += sprintf(buffer, "Mems_allowed:\t");
-	buffer += nodemask_scnprintf(buffer, PAGE_SIZE, task->mems_allowed);
-	buffer += sprintf(buffer, "\n");
-	return buffer;
+void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
+{
+	seq_printf(m, "Cpus_allowed:\t");
+	m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
+					task->cpus_allowed);
+	seq_printf(m, "\n");
+	seq_printf(m, "Mems_allowed:\t");
+	m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
+					task->mems_allowed);
+	seq_printf(m, "\n");
 }
-- 
cgit 


From 2d3a4e3666325a9709cc8ea2e88151394e8f20fc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@sw.ru>
Date: Fri, 8 Feb 2008 04:18:37 -0800
Subject: proc: fix ->open'less usage due to ->proc_fops flip

Typical PDE creation code looks like:

	pde = create_proc_entry("foo", 0, NULL);
	if (pde)
		pde->proc_fops = &foo_proc_fops;

Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
   possible to ->read without ->open (see one class of oopses below).

The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:

	pde = proc_create("foo", 0, NULL, &foo_proc_fops);
	if (!pde)
		return -ENOMEM;

Fix most networking users for a start.

In the long run, create_proc_entry() for regular files will go.

BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom

Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
       00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
       00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c10818b8>] proc_reg_read+0x60/0x73
 [<c1081858>] proc_reg_read+0x0/0x73
 [<c105a34f>] vfs_read+0x6c/0x8b
 [<c105a6f3>] sys_read+0x3c/0x63
 [<c10025f2>] sysenter_past_esp+0x5f/0xa5
 [<c10697a7>] destroy_inode+0x24/0x33
 =======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c       | 40 ++++++++++++++++++++++++++++++++++++----
 fs/proc/proc_net.c      |  7 +------
 fs/proc/root.c          |  1 +
 include/linux/proc_fs.h | 10 +++++++++-
 4 files changed, 47 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b9dd3628d43a..68971e66cd41 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -562,7 +562,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 	return 0;
 }
 
-static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
+static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 					  const char *name,
 					  mode_t mode,
 					  nlink_t nlink)
@@ -605,7 +605,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
 {
 	struct proc_dir_entry *ent;
 
-	ent = proc_create(&parent,name,
+	ent = __proc_create(&parent, name,
 			  (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
 
 	if (ent) {
@@ -630,7 +630,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
 {
 	struct proc_dir_entry *ent;
 
-	ent = proc_create(&parent, name, S_IFDIR | mode, 2);
+	ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
@@ -664,7 +664,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 		nlink = 1;
 	}
 
-	ent = proc_create(&parent,name,mode,nlink);
+	ent = __proc_create(&parent, name, mode, nlink);
 	if (ent) {
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
@@ -674,6 +674,38 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 	return ent;
 }
 
+struct proc_dir_entry *proc_create(const char *name, mode_t mode,
+				   struct proc_dir_entry *parent,
+				   const struct file_operations *proc_fops)
+{
+	struct proc_dir_entry *pde;
+	nlink_t nlink;
+
+	if (S_ISDIR(mode)) {
+		if ((mode & S_IALLUGO) == 0)
+			mode |= S_IRUGO | S_IXUGO;
+		nlink = 2;
+	} else {
+		if ((mode & S_IFMT) == 0)
+			mode |= S_IFREG;
+		if ((mode & S_IALLUGO) == 0)
+			mode |= S_IRUGO;
+		nlink = 1;
+	}
+
+	pde = __proc_create(&parent, name, mode, nlink);
+	if (!pde)
+		goto out;
+	pde->proc_fops = proc_fops;
+	if (proc_register(parent, pde) < 0)
+		goto out_free;
+	return pde;
+out_free:
+	kfree(pde);
+out:
+	return NULL;
+}
+
 void free_proc_entry(struct proc_dir_entry *de)
 {
 	unsigned int ino = de->low_ino;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4823c9677fac..14e9b5aaf863 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -67,12 +67,7 @@ EXPORT_SYMBOL_GPL(seq_release_net);
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
 	const char *name, mode_t mode, const struct file_operations *fops)
 {
-	struct proc_dir_entry *res;
-
-	res = create_proc_entry(name, mode, net->proc_net);
-	if (res)
-		res->proc_fops = fops;
-	return res;
+	return proc_create(name, mode, net->proc_net, fops);
 }
 EXPORT_SYMBOL_GPL(proc_net_fops_create);
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 81f99e691f99..ef0fb57fc9ef 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -232,6 +232,7 @@ void pid_ns_release_proc(struct pid_namespace *ns)
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
+EXPORT_SYMBOL(proc_create);
 EXPORT_SYMBOL(remove_proc_entry);
 EXPORT_SYMBOL(proc_root);
 EXPORT_SYMBOL(proc_root_fs);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e4a8f9aef188..d6a4f69bdc92 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -126,6 +126,9 @@ void de_put(struct proc_dir_entry *de);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+struct proc_dir_entry *proc_create(const char *name, mode_t mode,
+				struct proc_dir_entry *parent,
+				const struct file_operations *proc_fops);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
@@ -220,7 +223,12 @@ static inline void proc_flush_task(struct task_struct *task)
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
-
+static inline struct proc_dir_entry *proc_create(const char *name,
+	mode_t mode, struct proc_dir_entry *parent,
+	const struct file_operations *proc_fops)
+{
+	return NULL;
+}
 #define remove_proc_entry(name, parent) do {} while (0)
 
 static inline struct proc_dir_entry *proc_symlink(const char *name,
-- 
cgit 


From d94afc6ccf6690b30ae112ec8101b3f10d50114e Mon Sep 17 00:00:00 2001
From: mark gross <mgross@linux.intel.com>
Date: Fri, 8 Feb 2008 04:18:39 -0800
Subject: intel-iommu: fault_reason index cleanup

Fix an off by one bug in the fault reason string reporting function, and
clean up some of the code around this buglet.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: mark gross <mgross@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/intel-iommu.c | 11 +++++------
 include/linux/dmar.h      |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 585e188c1746..a4c3089f892a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -745,7 +745,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
 
 /* iommu interrupt handling. Most stuff are MSI-like. */
 
-static char *fault_reason_strings[] =
+static const char *fault_reason_strings[] =
 {
 	"Software",
 	"Present bit in root entry is clear",
@@ -760,14 +760,13 @@ static char *fault_reason_strings[] =
 	"non-zero reserved fields in RTP",
 	"non-zero reserved fields in CTP",
 	"non-zero reserved fields in PTE",
-	"Unknown"
 };
 #define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
 
-char *dmar_get_fault_reason(u8 fault_reason)
+const char *dmar_get_fault_reason(u8 fault_reason)
 {
-	if (fault_reason >= MAX_FAULT_REASON_IDX)
-		return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
+	if (fault_reason > MAX_FAULT_REASON_IDX)
+		return "Unknown";
 	else
 		return fault_reason_strings[fault_reason];
 }
@@ -825,7 +824,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
 		u8 fault_reason, u16 source_id, u64 addr)
 {
-	char *reason;
+	const char *reason;
 
 	reason = dmar_get_fault_reason(fault_reason);
 
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index ffb6439cb5e6..56c73b847551 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -28,7 +28,7 @@
 #ifdef CONFIG_DMAR
 struct intel_iommu;
 
-extern char *dmar_get_fault_reason(u8 fault_reason);
+extern const char *dmar_get_fault_reason(u8 fault_reason);
 
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
-- 
cgit 


From 92dfc9dc7ba63134f721b6e745dbdcfc13ea341b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 8 Feb 2008 04:18:43 -0800
Subject: fix "modules: make module_address_lookup() safe"

Get the constness right, avoid nasty cast.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/module.h | 4 ++--
 kernel/module.c        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index ac481e2094fd..ac28e8761e84 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -449,7 +449,7 @@ static inline void __module_get(struct module *module)
 /* For kallsyms to ask for address resolution.  namebuf should be at
  * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
  * found, otherwise NULL. */
-char *module_address_lookup(unsigned long addr,
+const char *module_address_lookup(unsigned long addr,
 			    unsigned long *symbolsize,
 			    unsigned long *offset,
 			    char **modname,
@@ -519,7 +519,7 @@ static inline void module_put(struct module *module)
 #define module_name(mod) "kernel"
 
 /* For kallsyms to ask for address resolution.  NULL means not found. */
-static inline char *module_address_lookup(unsigned long addr,
+static inline const char *module_address_lookup(unsigned long addr,
 					  unsigned long *symbolsize,
 					  unsigned long *offset,
 					  char **modname,
diff --git a/kernel/module.c b/kernel/module.c
index 676c023c831b..4202da97a1da 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2253,7 +2253,7 @@ static const char *get_ksymbol(struct module *mod,
 
 /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
  * not to lock to avoid deadlock on oopses, simply disable preemption. */
-char *module_address_lookup(unsigned long addr,
+const char *module_address_lookup(unsigned long addr,
 			    unsigned long *size,
 			    unsigned long *offset,
 			    char **modname,
@@ -2278,7 +2278,7 @@ char *module_address_lookup(unsigned long addr,
 		ret = namebuf;
 	}
 	preempt_enable();
-	return (char *)ret;
+	return ret;
 }
 
 int lookup_module_symbol_name(unsigned long addr, char *symname)
-- 
cgit 


From ed2ddbf88c0ddeeae4c78bb306a116dfd867c55c Mon Sep 17 00:00:00 2001
From: Pierre Peiffer <pierre.peiffer@bull.net>
Date: Fri, 8 Feb 2008 04:18:57 -0800
Subject: IPC: make struct ipc_ids static in ipc_namespace

Each ipc_namespace contains a table of 3 pointers to struct ipc_ids (3 for
msg, sem and shm, structure used to store all ipcs) These 'struct ipc_ids'
are dynamically allocated for each icp_namespace as the ipc_namespace
itself (for the init namespace, they are initialized with pointers to
static variables instead)

It is so for historical reason: in fact, before the use of idr to store the
ipcs, the ipcs were stored in tables of variable length, depending of the
maximum number of ipc allowed.  Now, these 'struct ipc_ids' have a fixed
size.  As they are allocated in any cases for each new ipc_namespace, there
is no gain of memory in having them allocated separately of the struct
ipc_namespace.

This patch proposes to make this table static in the struct ipc_namespace.
Thus, we can allocate all in once and get rid of all the code needed to
allocate and free these ipc_ids separately.

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Acked-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 13 +++++++++++--
 ipc/msg.c                     | 26 ++++----------------------
 ipc/namespace.c               | 25 ++++---------------------
 ipc/sem.c                     | 26 ++++----------------------
 ipc/shm.c                     | 26 ++++----------------------
 ipc/util.c                    |  6 +++---
 ipc/util.h                    | 16 ++++------------
 7 files changed, 34 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a491fc9dd231..6db5522eef51 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -2,11 +2,20 @@
 #define __IPC_NAMESPACE_H__
 
 #include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/rwsem.h>
+
+struct ipc_ids {
+	int in_use;
+	unsigned short seq;
+	unsigned short seq_max;
+	struct rw_semaphore rw_mutex;
+	struct idr ipcs_idr;
+};
 
-struct ipc_ids;
 struct ipc_namespace {
 	struct kref	kref;
-	struct ipc_ids	*ids[3];
+	struct ipc_ids	ids[3];
 
 	int		sem_ctls[4];
 	int		used_sems;
diff --git a/ipc/msg.c b/ipc/msg.c
index 5879bfeb79ca..ab0c38b29533 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -67,9 +67,7 @@ struct msg_sender {
 #define SEARCH_NOTEQUAL		3
 #define SEARCH_LESSEQUAL	4
 
-static struct ipc_ids init_msg_ids;
-
-#define msg_ids(ns)	(*((ns)->ids[IPC_MSG_IDS]))
+#define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS])
 
 #define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
 #define msg_buildid(id, seq)	ipc_buildid(id, seq)
@@ -80,30 +78,17 @@ static int newque(struct ipc_namespace *, struct ipc_params *);
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
 
-static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+void msg_init_ns(struct ipc_namespace *ns)
 {
-	ns->ids[IPC_MSG_IDS] = ids;
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 	atomic_set(&ns->msg_bytes, 0);
 	atomic_set(&ns->msg_hdrs, 0);
-	ipc_init_ids(ids);
+	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
 }
 
 #ifdef CONFIG_IPC_NS
-int msg_init_ns(struct ipc_namespace *ns)
-{
-	struct ipc_ids *ids;
-
-	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
-	if (ids == NULL)
-		return -ENOMEM;
-
-	__msg_init_ns(ns, ids);
-	return 0;
-}
-
 void msg_exit_ns(struct ipc_namespace *ns)
 {
 	struct msg_queue *msq;
@@ -126,15 +111,12 @@ void msg_exit_ns(struct ipc_namespace *ns)
 	}
 
 	up_write(&msg_ids(ns).rw_mutex);
-
-	kfree(ns->ids[IPC_MSG_IDS]);
-	ns->ids[IPC_MSG_IDS] = NULL;
 }
 #endif
 
 void __init msg_init(void)
 {
-	__msg_init_ns(&init_ipc_ns, &init_msg_ids);
+	msg_init_ns(&init_ipc_ns);
 	ipc_init_proc_interface("sysvipc/msg",
 				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
 				IPC_MSG_IDS, sysvipc_msg_proc_show);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index cef1139e6c96..1fed8922d475 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -14,35 +14,18 @@
 
 static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
 {
-	int err;
 	struct ipc_namespace *ns;
 
-	err = -ENOMEM;
 	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
 	if (ns == NULL)
-		goto err_mem;
+		return ERR_PTR(-ENOMEM);
 
-	err = sem_init_ns(ns);
-	if (err)
-		goto err_sem;
-	err = msg_init_ns(ns);
-	if (err)
-		goto err_msg;
-	err = shm_init_ns(ns);
-	if (err)
-		goto err_shm;
+	sem_init_ns(ns);
+	msg_init_ns(ns);
+	shm_init_ns(ns);
 
 	kref_init(&ns->kref);
 	return ns;
-
-err_shm:
-	msg_exit_ns(ns);
-err_msg:
-	sem_exit_ns(ns);
-err_sem:
-	kfree(ns);
-err_mem:
-	return ERR_PTR(err);
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
diff --git a/ipc/sem.c b/ipc/sem.c
index 7836edb0cf96..1f7e28d1d25d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -87,14 +87,12 @@
 #include <asm/uaccess.h>
 #include "util.h"
 
-#define sem_ids(ns)	(*((ns)->ids[IPC_SEM_IDS]))
+#define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
 
 #define sem_unlock(sma)		ipc_unlock(&(sma)->sem_perm)
 #define sem_checkid(sma, semid)	ipc_checkid(&sma->sem_perm, semid)
 #define sem_buildid(id, seq)	ipc_buildid(id, seq)
 
-static struct ipc_ids init_sem_ids;
-
 static int newary(struct ipc_namespace *, struct ipc_params *);
 static void freeary(struct ipc_namespace *, struct sem_array *);
 #ifdef CONFIG_PROC_FS
@@ -118,30 +116,17 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #define sc_semopm	sem_ctls[2]
 #define sc_semmni	sem_ctls[3]
 
-static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+void sem_init_ns(struct ipc_namespace *ns)
 {
-	ns->ids[IPC_SEM_IDS] = ids;
 	ns->sc_semmsl = SEMMSL;
 	ns->sc_semmns = SEMMNS;
 	ns->sc_semopm = SEMOPM;
 	ns->sc_semmni = SEMMNI;
 	ns->used_sems = 0;
-	ipc_init_ids(ids);
+	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
 }
 
 #ifdef CONFIG_IPC_NS
-int sem_init_ns(struct ipc_namespace *ns)
-{
-	struct ipc_ids *ids;
-
-	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
-	if (ids == NULL)
-		return -ENOMEM;
-
-	__sem_init_ns(ns, ids);
-	return 0;
-}
-
 void sem_exit_ns(struct ipc_namespace *ns)
 {
 	struct sem_array *sma;
@@ -163,15 +148,12 @@ void sem_exit_ns(struct ipc_namespace *ns)
 		total++;
 	}
 	up_write(&sem_ids(ns).rw_mutex);
-
-	kfree(ns->ids[IPC_SEM_IDS]);
-	ns->ids[IPC_SEM_IDS] = NULL;
 }
 #endif
 
 void __init sem_init (void)
 {
-	__sem_init_ns(&init_ipc_ns, &init_sem_ids);
+	sem_init_ns(&init_ipc_ns);
 	ipc_init_proc_interface("sysvipc/sem",
 				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
 				IPC_SEM_IDS, sysvipc_sem_proc_show);
diff --git a/ipc/shm.c b/ipc/shm.c
index 07f4b7abc80a..fe92471e19c7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -56,9 +56,7 @@ struct shm_file_data {
 static const struct file_operations shm_file_operations;
 static struct vm_operations_struct shm_vm_ops;
 
-static struct ipc_ids init_shm_ids;
-
-#define shm_ids(ns)	(*((ns)->ids[IPC_SHM_IDS]))
+#define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
 
 #define shm_unlock(shp)			\
 	ipc_unlock(&(shp)->shm_perm)
@@ -72,14 +70,13 @@ static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
 
-static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+void shm_init_ns(struct ipc_namespace *ns)
 {
-	ns->ids[IPC_SHM_IDS] = ids;
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
 	ns->shm_tot = 0;
-	ipc_init_ids(ids);
+	ipc_init_ids(&ns->ids[IPC_SHM_IDS]);
 }
 
 /*
@@ -98,18 +95,6 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 }
 
 #ifdef CONFIG_IPC_NS
-int shm_init_ns(struct ipc_namespace *ns)
-{
-	struct ipc_ids *ids;
-
-	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
-	if (ids == NULL)
-		return -ENOMEM;
-
-	__shm_init_ns(ns, ids);
-	return 0;
-}
-
 void shm_exit_ns(struct ipc_namespace *ns)
 {
 	struct shmid_kernel *shp;
@@ -131,15 +116,12 @@ void shm_exit_ns(struct ipc_namespace *ns)
 		total++;
 	}
 	up_write(&shm_ids(ns).rw_mutex);
-
-	kfree(ns->ids[IPC_SHM_IDS]);
-	ns->ids[IPC_SHM_IDS] = NULL;
 }
 #endif
 
 void __init shm_init (void)
 {
-	__shm_init_ns(&init_ipc_ns, &init_shm_ids);
+	shm_init_ns(&init_ipc_ns);
 	ipc_init_proc_interface("sysvipc/shm",
 				"       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n",
 				IPC_SHM_IDS, sysvipc_shm_proc_show);
diff --git a/ipc/util.c b/ipc/util.c
index 910db7748199..fd1b50da9db8 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -833,7 +833,7 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
 	if (ipc && ipc != SEQ_START_TOKEN)
 		ipc_unlock(ipc);
 
-	return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
+	return sysvipc_find_ipc(&iter->ns->ids[iface->ids], *pos, pos);
 }
 
 /*
@@ -846,7 +846,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 	struct ipc_proc_iface *iface = iter->iface;
 	struct ipc_ids *ids;
 
-	ids = iter->ns->ids[iface->ids];
+	ids = &iter->ns->ids[iface->ids];
 
 	/*
 	 * Take the lock - this will be released by the corresponding
@@ -877,7 +877,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
 	if (ipc && ipc != SEQ_START_TOKEN)
 		ipc_unlock(ipc);
 
-	ids = iter->ns->ids[iface->ids];
+	ids = &iter->ns->ids[iface->ids];
 	/* Release the lock we took in start() */
 	up_read(&ids->rw_mutex);
 }
diff --git a/ipc/util.h b/ipc/util.h
index ca245fae2f98..f37d160c98fe 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -10,7 +10,6 @@
 #ifndef _IPC_UTIL_H
 #define _IPC_UTIL_H
 
-#include <linux/idr.h>
 #include <linux/err.h>
 
 #define USHRT_MAX 0xffff
@@ -22,22 +21,14 @@ void shm_init (void);
 
 struct ipc_namespace;
 
-int sem_init_ns(struct ipc_namespace *ns);
-int msg_init_ns(struct ipc_namespace *ns);
-int shm_init_ns(struct ipc_namespace *ns);
+void sem_init_ns(struct ipc_namespace *ns);
+void msg_init_ns(struct ipc_namespace *ns);
+void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 
-struct ipc_ids {
-	int in_use;
-	unsigned short seq;
-	unsigned short seq_max;
-	struct rw_semaphore rw_mutex;
-	struct idr ipcs_idr;
-};
-
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -68,6 +59,7 @@ struct ipc_ops {
 };
 
 struct seq_file;
+struct ipc_ids;
 
 void ipc_init_ids(struct ipc_ids *);
 #ifdef CONFIG_PROC_FS
-- 
cgit 


From 01b8b07a5d77d22e609267dcae74d15e3e9c5f13 Mon Sep 17 00:00:00 2001
From: Pierre Peiffer <pierre.peiffer@bull.net>
Date: Fri, 8 Feb 2008 04:18:57 -0800
Subject: IPC: consolidate sem_exit_ns(), msg_exit_ns() and shm_exit_ns()

sem_exit_ns(), msg_exit_ns() and shm_exit_ns() are all called when an
ipc_namespace is released to free all ipcs of each type.  But in fact, they
do the same thing: they loop around all ipcs to free them individually by
calling a specific routine.

This patch proposes to consolidate this by introducing a common function,
free_ipcs(), that do the job.  The specific routine to call on each
individual ipcs is passed as parameter.  For this, these ipc-specific
'free' routines are reworked to take a generic 'struct ipc_perm' as
parameter.

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  5 ++++-
 ipc/msg.c                     | 28 +++++-----------------------
 ipc/namespace.c               | 30 ++++++++++++++++++++++++++++++
 ipc/sem.c                     | 27 +++++----------------------
 ipc/shm.c                     | 27 ++++++---------------------
 5 files changed, 50 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6db5522eef51..e4451d1da753 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -43,7 +43,10 @@ extern struct ipc_namespace init_ipc_ns;
 #if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS)
 extern void free_ipc_ns(struct kref *kref);
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-						struct ipc_namespace *ns);
+				       struct ipc_namespace *ns);
+extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
+		      void (*free)(struct ipc_namespace *,
+				   struct kern_ipc_perm *));
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
diff --git a/ipc/msg.c b/ipc/msg.c
index ab0c38b29533..46585a05473e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -72,7 +72,7 @@ struct msg_sender {
 #define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
 #define msg_buildid(id, seq)	ipc_buildid(id, seq)
 
-static void freeque(struct ipc_namespace *, struct msg_queue *);
+static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
 static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
@@ -91,26 +91,7 @@ void msg_init_ns(struct ipc_namespace *ns)
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
-	struct msg_queue *msq;
-	struct kern_ipc_perm *perm;
-	int next_id;
-	int total, in_use;
-
-	down_write(&msg_ids(ns).rw_mutex);
-
-	in_use = msg_ids(ns).in_use;
-
-	for (total = 0, next_id = 0; total < in_use; next_id++) {
-		perm = idr_find(&msg_ids(ns).ipcs_idr, next_id);
-		if (perm == NULL)
-			continue;
-		ipc_lock_by_ptr(perm);
-		msq = container_of(perm, struct msg_queue, q_perm);
-		freeque(ns, msq);
-		total++;
-	}
-
-	up_write(&msg_ids(ns).rw_mutex);
+	free_ipcs(ns, &msg_ids(ns), freeque);
 }
 #endif
 
@@ -274,9 +255,10 @@ static void expunge_all(struct msg_queue *msq, int res)
  * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
  * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
  */
-static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
+static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct list_head *tmp;
+	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
 
 	expunge_all(msq, -EIDRM);
 	ss_wakeup(&msq->q_senders, 1);
@@ -582,7 +564,7 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		break;
 	}
 	case IPC_RMID:
-		freeque(ns, msq);
+		freeque(ns, &msq->q_perm);
 		break;
 	}
 	err = 0;
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 1fed8922d475..1b967655eb35 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -44,6 +44,36 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
 	return new_ns;
 }
 
+/*
+ * free_ipcs - free all ipcs of one type
+ * @ns:   the namespace to remove the ipcs from
+ * @ids:  the table of ipcs to free
+ * @free: the function called to free each individual ipc
+ *
+ * Called for each kind of ipc when an ipc_namespace exits.
+ */
+void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
+	       void (*free)(struct ipc_namespace *, struct kern_ipc_perm *))
+{
+	struct kern_ipc_perm *perm;
+	int next_id;
+	int total, in_use;
+
+	down_write(&ids->rw_mutex);
+
+	in_use = ids->in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		perm = idr_find(&ids->ipcs_idr, next_id);
+		if (perm == NULL)
+			continue;
+		ipc_lock_by_ptr(perm);
+		free(ns, perm);
+		total++;
+	}
+	up_write(&ids->rw_mutex);
+}
+
 void free_ipc_ns(struct kref *kref)
 {
 	struct ipc_namespace *ns;
diff --git a/ipc/sem.c b/ipc/sem.c
index 1f7e28d1d25d..0b45a4d383c6 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -94,7 +94,7 @@
 #define sem_buildid(id, seq)	ipc_buildid(id, seq)
 
 static int newary(struct ipc_namespace *, struct ipc_params *);
-static void freeary(struct ipc_namespace *, struct sem_array *);
+static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -129,25 +129,7 @@ void sem_init_ns(struct ipc_namespace *ns)
 #ifdef CONFIG_IPC_NS
 void sem_exit_ns(struct ipc_namespace *ns)
 {
-	struct sem_array *sma;
-	struct kern_ipc_perm *perm;
-	int next_id;
-	int total, in_use;
-
-	down_write(&sem_ids(ns).rw_mutex);
-
-	in_use = sem_ids(ns).in_use;
-
-	for (total = 0, next_id = 0; total < in_use; next_id++) {
-		perm = idr_find(&sem_ids(ns).ipcs_idr, next_id);
-		if (perm == NULL)
-			continue;
-		ipc_lock_by_ptr(perm);
-		sma = container_of(perm, struct sem_array, sem_perm);
-		freeary(ns, sma);
-		total++;
-	}
-	up_write(&sem_ids(ns).rw_mutex);
+	free_ipcs(ns, &sem_ids(ns), freeary);
 }
 #endif
 
@@ -542,10 +524,11 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
  * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
  * remains locked on exit.
  */
-static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
+static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct sem_undo *un;
 	struct sem_queue *q;
+	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
 	/* Invalidate the existing undo structures for this semaphore set.
 	 * (They will be freed without any further action in exit_sem()
@@ -926,7 +909,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
 
 	switch(cmd){
 	case IPC_RMID:
-		freeary(ns, sma);
+		freeary(ns, ipcp);
 		err = 0;
 		break;
 	case IPC_SET:
diff --git a/ipc/shm.c b/ipc/shm.c
index fe92471e19c7..c47e87278a92 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -83,8 +83,11 @@ void shm_init_ns(struct ipc_namespace *ns)
  * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
  * Only shm_ids.rw_mutex remains locked on exit.
  */
-static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
+	struct shmid_kernel *shp;
+	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+
 	if (shp->shm_nattch){
 		shp->shm_perm.mode |= SHM_DEST;
 		/* Do not find it any more */
@@ -97,25 +100,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 #ifdef CONFIG_IPC_NS
 void shm_exit_ns(struct ipc_namespace *ns)
 {
-	struct shmid_kernel *shp;
-	struct kern_ipc_perm *perm;
-	int next_id;
-	int total, in_use;
-
-	down_write(&shm_ids(ns).rw_mutex);
-
-	in_use = shm_ids(ns).in_use;
-
-	for (total = 0, next_id = 0; total < in_use; next_id++) {
-		perm = idr_find(&shm_ids(ns).ipcs_idr, next_id);
-		if (perm == NULL)
-			continue;
-		ipc_lock_by_ptr(perm);
-		shp = container_of(perm, struct shmid_kernel, shm_perm);
-		do_shm_rmid(ns, shp);
-		total++;
-	}
-	up_write(&shm_ids(ns).rw_mutex);
+	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
 }
 #endif
 
@@ -832,7 +817,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		if (err)
 			goto out_unlock_up;
 
-		do_shm_rmid(ns, shp);
+		do_shm_rmid(ns, &shp->shm_perm);
 		up_write(&shm_ids(ns).rw_mutex);
 		goto out;
 	}
-- 
cgit 


From 6b39c7bfbd1436836c0fb34c5b437fda1a7a3dd4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Feb 2008 04:18:58 -0800
Subject: kill PT_ATTACHED

Since the patch

	"Fix ptrace_attach()/ptrace_traceme()/de_thread() race"
	commit f5b40e363ad6041a96e3da32281d8faa191597b9

we set PT_ATTACHED and change child->parent "atomically" wrt task_list lock.

This means we can remove the checks like "PT_ATTACHED && ->parent != ptracer"
which were needed to catch the "ptrace attach is in progress" case.  We can
also remove the flag itself since nobody else uses it.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h |  1 -
 kernel/exit.c          | 13 +------------
 kernel/ptrace.c        |  6 ++----
 kernel/signal.c        |  5 -----
 4 files changed, 3 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 6ab80714a916..ebe0c17039cf 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -67,7 +67,6 @@
 #define PT_TRACE_EXEC	0x00000080
 #define PT_TRACE_VFORK_DONE	0x00000100
 #define PT_TRACE_EXIT	0x00000200
-#define PT_ATTACHED	0x00000400	/* parent != real_parent */
 
 #define PT_TRACE_MASK	0x000003f4
 
diff --git a/kernel/exit.c b/kernel/exit.c
index eb9934a82fc1..1f2c15297f2d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1514,18 +1514,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
 
 static inline int my_ptrace_child(struct task_struct *p)
 {
-	if (!(p->ptrace & PT_PTRACED))
-		return 0;
-	if (!(p->ptrace & PT_ATTACHED))
-		return 1;
-	/*
-	 * This child was PTRACE_ATTACH'd.  We should be seeing it only if
-	 * we are the attacher.  If we are the real parent, this is a race
-	 * inside ptrace_attach.  It is waiting for the tasklist_lock,
-	 * which we have to switch the parent links, but has already set
-	 * the flags in p->ptrace.
-	 */
-	return (p->parent != p->real_parent);
+	return p->ptrace & PT_PTRACED;
 }
 
 static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 628b03ab88a5..2dc00298e678 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -100,8 +100,7 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 	 */
 	read_lock(&tasklist_lock);
 	if ((child->ptrace & PT_PTRACED) && child->parent == current &&
-	    (!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
-	    && child->signal != NULL) {
+	     child->signal != NULL) {
 		ret = 0;
 		spin_lock_irq(&child->sighand->siglock);
 		if (task_is_stopped(child))
@@ -200,8 +199,7 @@ repeat:
 		goto bad;
 
 	/* Go */
-	task->ptrace |= PT_PTRACED | ((task->real_parent != current)
-				      ? PT_ATTACHED : 0);
+	task->ptrace |= PT_PTRACED;
 	if (capable(CAP_SYS_PTRACE))
 		task->ptrace |= PT_PTRACE_CAP;
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d30ff561847..6d6d1ab39e7e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1556,11 +1556,6 @@ static inline int may_ptrace_stop(void)
 {
 	if (!likely(current->ptrace & PT_PTRACED))
 		return 0;
-
-	if (unlikely(current->parent == current->real_parent &&
-		    (current->ptrace & PT_ATTACHED)))
-		return 0;
-
 	/*
 	 * Are we in the middle of do_coredump?
 	 * If so and our tracer is also part of the coredump stopping
-- 
cgit 


From 8520d7c7f8611216e3b270becec95bb35b6899d4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Feb 2008 04:19:09 -0800
Subject: teach set_special_pids() to use struct pid

Change set_special_pids() to work with struct pid, not pid_t from global name
space. This again speedups and imho cleanups the code, also a preparation for
the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 init/main.c           |  2 +-
 kernel/exit.c         | 30 +++++++++++++++---------------
 kernel/sys.c          |  2 +-
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a4812c1c038..d1c9b7f1d51e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1632,7 +1632,7 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-extern void __set_special_pids(pid_t session, pid_t pgrp);
+extern void __set_special_pids(struct pid *pid);
 
 /* per-UID process charging. */
 extern struct user_struct * alloc_uid(struct user_namespace *, uid_t);
diff --git a/init/main.c b/init/main.c
index 2a78932f6c07..9a5b18c0a63d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -833,7 +833,7 @@ static int __init kernel_init(void * unused)
 	 */
 	init_pid_ns.child_reaper = current;
 
-	__set_special_pids(1, 1);
+	__set_special_pids(task_pid(current));
 	cad_pid = task_pid(current);
 
 	smp_prepare_cpus(setup_max_cpus);
diff --git a/kernel/exit.c b/kernel/exit.c
index 42a8713b2050..96716fd22373 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -293,26 +293,27 @@ static void reparent_to_kthreadd(void)
 	switch_uid(INIT_USER);
 }
 
-void __set_special_pids(pid_t session, pid_t pgrp)
+void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
+	pid_t nr = pid_nr(pid);
 
-	if (task_session_nr(curr) != session) {
+	if (task_session(curr) != pid) {
 		detach_pid(curr, PIDTYPE_SID);
-		set_task_session(curr, session);
-		attach_pid(curr, PIDTYPE_SID, find_pid(session));
+		attach_pid(curr, PIDTYPE_SID, pid);
+		set_task_session(curr, nr);
 	}
-	if (task_pgrp_nr(curr) != pgrp) {
+	if (task_pgrp(curr) != pid) {
 		detach_pid(curr, PIDTYPE_PGID);
-		set_task_pgrp(curr, pgrp);
-		attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
+		attach_pid(curr, PIDTYPE_PGID, pid);
+		set_task_pgrp(curr, nr);
 	}
 }
 
-static void set_special_pids(pid_t session, pid_t pgrp)
+static void set_special_pids(struct pid *pid)
 {
 	write_lock_irq(&tasklist_lock);
-	__set_special_pids(session, pgrp);
+	__set_special_pids(pid);
 	write_unlock_irq(&tasklist_lock);
 }
 
@@ -383,7 +384,11 @@ void daemonize(const char *name, ...)
 	 */
 	current->flags |= PF_NOFREEZE;
 
-	set_special_pids(1, 1);
+	if (current->nsproxy != &init_nsproxy) {
+		get_nsproxy(&init_nsproxy);
+		switch_task_namespaces(current, &init_nsproxy);
+	}
+	set_special_pids(find_pid(1));
 	proc_clear_tty(current);
 
 	/* Block and flush all signals */
@@ -398,11 +403,6 @@ void daemonize(const char *name, ...)
 	current->fs = fs;
 	atomic_inc(&fs->count);
 
-	if (current->nsproxy != init_task.nsproxy) {
-		get_nsproxy(init_task.nsproxy);
-		switch_task_namespaces(current, init_task.nsproxy);
-	}
-
 	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
diff --git a/kernel/sys.c b/kernel/sys.c
index e13bc518b444..c326d6dceee3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1065,7 +1065,7 @@ asmlinkage long sys_setsid(void)
 		goto out;
 
 	group_leader->signal->leader = 1;
-	__set_special_pids(pid_nr(sid), pid_nr(sid));
+	__set_special_pids(sid);
 
 	spin_lock(&group_leader->sighand->siglock);
 	group_leader->signal->tty = NULL;
-- 
cgit 


From d12619b5ff5664623524aef796514d1946ea3b4a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Feb 2008 04:19:12 -0800
Subject: fix group stop with exit race

do_signal_stop() counts all sub-thread and sets ->group_stop_count
accordingly.  Every thread should decrement ->group_stop_count and stop,
the last one should notify the parent.

However a sub-thread can exit before it notices the signal_pending(), or it
may be somewhere in do_exit() already.  In that case the group stop never
finishes properly.

Note: this is a minimal fix, we can add some optimizations later.  Say we
can return quickly if thread_group_empty().  Also, we can move some signal
related code from exit_notify() to exit_signals().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h |  1 +
 kernel/exit.c          |  2 +-
 kernel/signal.c        | 27 ++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 7e095147656c..42d2e0a948f4 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -241,6 +241,7 @@ extern int show_unhandled_signals;
 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern void exit_signals(struct task_struct *tsk);
 
 extern struct kmem_cache *sighand_cachep;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index d7815f570882..8f3bf53a5b4d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -947,7 +947,7 @@ fastcall NORET_TYPE void do_exit(long code)
 		schedule();
 	}
 
-	tsk->flags |= PF_EXITING;
+	exit_signals(tsk);  /* sets PF_EXITING */
 	/*
 	 * tsk->flags are checked in the futex code to protect against
 	 * an exiting task cleaning up the robust pi futexes.
diff --git a/kernel/signal.c b/kernel/signal.c
index 3fca710a5cd7..209eec11eef5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1739,7 +1739,7 @@ static int do_signal_stop(int signr)
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
-			if (!t->exit_state &&
+			if (!(t->flags & PF_EXITING) &&
 			    !task_is_stopped_or_traced(t)) {
 				stop_count++;
 				signal_wake_up(t, 0);
@@ -1900,6 +1900,31 @@ relock:
 	return signr;
 }
 
+void exit_signals(struct task_struct *tsk)
+{
+	int group_stop = 0;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (unlikely(tsk->signal->group_stop_count) &&
+			!--tsk->signal->group_stop_count) {
+		tsk->signal->flags = SIGNAL_STOP_STOPPED;
+		group_stop = 1;
+	}
+
+	/*
+	 * From now this task is not visible for group-wide signals,
+	 * see wants_signal(), do_signal_stop().
+	 */
+	tsk->flags |= PF_EXITING;
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (unlikely(group_stop)) {
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(tsk, CLD_STOPPED);
+		read_unlock(&tasklist_lock);
+	}
+}
+
 EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
-- 
cgit 


From 44c4e1b2581f7273ab14ef30b6430618801c57b1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Feb 2008 04:19:15 -0800
Subject: pid: Extend/Fix pid_vnr

pid_vnr returns the user space pid with respect to the pid namespace the
struct pid was allocated in.  What we want before we return a pid to user
space is the user space pid with respect to the pid namespace of current.

pid_vnr is a very nice optimization but because it isn't quite what we want
it is easy to use pid_vnr at times when we aren't certain the struct pid
was allocated in our pid namespace.

Currently this describes at least tiocgpgrp and tiocgsid in ttyio.c the
parent process reported in the core dumps and the parent process in
get_signal_to_deliver.

So unless the performance impact is huge having an interface that does what
we want instead of always what we want should be much more reliable and
much less error prone.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h   | 14 +++-----------
 include/linux/sched.h |  5 ++---
 kernel/pid.c          |  6 ++++++
 3 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 061abb6c0796..b91f4732dc1b 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -127,9 +127,8 @@ extern void FASTCALL(free_pid(struct pid *pid));
  * the helpers to get the pid's id seen from different namespaces
  *
  * pid_nr()    : global id, i.e. the id seen from the init namespace;
- * pid_vnr()   : virtual id, i.e. the id seen from the namespace this pid
- *               belongs to. this only makes sence when called in the
- *               context of the task that belongs to the same namespace;
+ * pid_vnr()   : virtual id, i.e. the id seen from the pid namespace of
+ *               current.
  * pid_nr_ns() : id seen from the ns specified.
  *
  * see also task_xid_nr() etc in include/linux/sched.h
@@ -144,14 +143,7 @@ static inline pid_t pid_nr(struct pid *pid)
 }
 
 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
-
-static inline pid_t pid_vnr(struct pid *pid)
-{
-	pid_t nr = 0;
-	if (pid)
-		nr = pid->numbers[pid->level].nr;
-	return nr;
-}
+pid_t pid_vnr(struct pid *pid);
 
 #define do_each_pid_task(pid, type, task)				\
 	do {								\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1c9b7f1d51e..3deb6e5d3096 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1332,9 +1332,8 @@ struct pid_namespace;
  * from various namespaces
  *
  * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
- * task_xid_vnr()    : virtual id, i.e. the id seen from the namespace the task
- *                     belongs to. this only makes sence when called in the
- *                     context of the task that belongs to the same namespace;
+ * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
+ *                     current.
  * task_xid_nr_ns()  : id seen from the ns specified;
  *
  * set_task_vxid()   : assigns a virtual id to a task;
diff --git a/kernel/pid.c b/kernel/pid.c
index 939746fb4ce7..a32859c4a3cd 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -443,6 +443,12 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
 	return nr;
 }
 
+pid_t pid_vnr(struct pid *pid)
+{
+	return pid_nr_ns(pid, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(pid_vnr);
+
 pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return pid_nr_ns(task_pid(tsk), ns);
-- 
cgit 


From 46f382d2b69d2221086b823f0dbc8f32c027cac2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Feb 2008 04:19:19 -0800
Subject: uglify while_each_pid_task() to make sure we don't count the execing
 pricess twice

There is a window when de_thread() switches the leader and drops
tasklist_lock.  In that window do_each_pid_task(PIDTYPE_PID) finds both new
and old leaders.

The problem is pretty much theoretical and probably can be ignored.  Currently
the only users of do_each_pid_task(PIDTYPE_PID) are send_sigio/send_sigurg, so
they can send the signal to the same process twice.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index b91f4732dc1b..f84d532b5d23 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -152,7 +152,13 @@ pid_t pid_vnr(struct pid *pid);
 			hlist_for_each_entry_rcu((task), pos___,	\
 				&pid->tasks[type], pids[type].node) {
 
+			/*
+			 * Both old and new leaders may be attached to
+			 * the same pid in the middle of de_thread().
+			 */
 #define while_each_pid_task(pid, type, task)				\
+				if (type == PIDTYPE_PID)		\
+					break;				\
 			}						\
 	} while (0)
 
-- 
cgit 


From fea9d175545b38cb3e84569400419eb81bc90fa3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Feb 2008 04:19:19 -0800
Subject: ITIMER_REAL: convert to use struct pid

signal_struct->tsk points to the ->group_leader and thus we have the nasty
code in de_thread() which has to change it and restart ->real_timer if the
leader is changed.

Use "struct pid *leader_pid" instead.  This also allows us to kill now
unneeded send_group_sig_info().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             | 22 ++--------------------
 include/linux/sched.h |  3 +--
 kernel/fork.c         |  2 +-
 kernel/itimer.c       |  2 +-
 kernel/signal.c       | 14 --------------
 5 files changed, 5 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index be923e4bc389..927a7c5ef4af 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -782,26 +782,8 @@ static int de_thread(struct task_struct *tsk)
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
 
-	/*
-	 * Account for the thread group leader hanging around:
-	 */
-	count = 1;
-	if (!thread_group_leader(tsk)) {
-		count = 2;
-		/*
-		 * The SIGALRM timer survives the exec, but needs to point
-		 * at us as the new group leader now.  We have a race with
-		 * a timer firing now getting the old leader, so we need to
-		 * synchronize with any firing (by calling del_timer_sync)
-		 * before we can safely let the old group leader die.
-		 */
-		sig->tsk = tsk;
-		spin_unlock_irq(lock);
-		if (hrtimer_cancel(&sig->real_timer))
-			hrtimer_restart(&sig->real_timer);
-		spin_lock_irq(lock);
-	}
-
+	/* Account for the thread group leader hanging around: */
+	count = thread_group_leader(tsk) ? 1 : 2;
 	sig->notify_count = count;
 	while (atomic_read(&sig->count) > count) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3deb6e5d3096..b2d161d87db7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -460,7 +460,7 @@ struct signal_struct {
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
-	struct task_struct *tsk;
+	struct pid *leader_pid;
 	ktime_t it_real_incr;
 
 	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
@@ -1686,7 +1686,6 @@ extern void block_all_signals(int (*notifier)(void *priv), void *priv,
 extern void unblock_all_signals(void);
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
diff --git a/kernel/fork.c b/kernel/fork.c
index b2ef8e4fad70..ca54d9704644 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
-	sig->tsk = tsk;
 
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
@@ -1338,6 +1337,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			if (clone_flags & CLONE_NEWPID)
 				p->nsproxy->pid_ns->child_reaper = p;
 
+			p->signal->leader_pid = pid;
 			p->signal->tty = current->signal->tty;
 			set_task_pgrp(p, task_pgrp_nr(current));
 			set_task_session(p, task_session_nr(current));
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 2fab344dbf56..ab982747d9bd 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -132,7 +132,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
 
-	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
+	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index b0b43a4ad8dd..cc45a6b69134 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1205,20 +1205,6 @@ send_sig(int sig, struct task_struct *p, int priv)
 	return send_sig_info(sig, __si_special(priv), p);
 }
 
-/*
- * This is the entry point for "process-wide" signals.
- * They will go to an appropriate thread in the thread group.
- */
-int
-send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p)
-{
-	int ret;
-	read_lock(&tasklist_lock);
-	ret = group_send_sig_info(sig, info, p);
-	read_unlock(&tasklist_lock);
-	return ret;
-}
-
 void
 force_sig(int sig, struct task_struct *p)
 {
-- 
cgit 


From 146a505d498c36de98ec161d791dd50beca7f9a3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 8 Feb 2008 04:19:22 -0800
Subject: Get rid of the kill_pgrp_info() function

There's only one caller left - the kill_pgrp one - so merge these two
functions and forget the kill_pgrp_info one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 -
 kernel/signal.c       | 21 ++++++++-------------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b2d161d87db7..00e144117326 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1689,7 +1689,6 @@ extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
 extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
diff --git a/kernel/signal.c b/kernel/signal.c
index a805c7417cd0..2c1f08defac2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1018,7 +1018,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 }
 
 /*
- * kill_pgrp_info() sends a signal to a process group: this is what the tty
+ * __kill_pgrp_info() sends a signal to a process group: this is what the tty
  * control characters do (^C, ^Z etc)
  */
 
@@ -1037,17 +1037,6 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 	return success ? 0 : retval;
 }
 
-int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
-{
-	int retval;
-
-	read_lock(&tasklist_lock);
-	retval = __kill_pgrp_info(sig, info, pgrp);
-	read_unlock(&tasklist_lock);
-
-	return retval;
-}
-
 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 {
 	int error = -ESRCH;
@@ -1236,7 +1225,13 @@ force_sigsegv(int sig, struct task_struct *p)
 
 int kill_pgrp(struct pid *pid, int sig, int priv)
 {
-	return kill_pgrp_info(sig, __si_special(priv), pid);
+	int ret;
+
+	read_lock(&tasklist_lock);
+	ret = __kill_pgrp_info(sig, __si_special(priv), pid);
+	read_unlock(&tasklist_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL(kill_pgrp);
 
-- 
cgit 


From cf4fc6cb76e50b01666e28a9f4b2e6fbcbb96d5f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 8 Feb 2008 04:19:24 -0800
Subject: timekeeping: rename timekeeping_is_continuous to
 timekeeping_valid_for_hres

Function timekeeping_is_continuous() no longer checks flag
CLOCK_IS_CONTINUOUS, and it checks CLOCK_SOURCE_VALID_FOR_HRES now.  So rename
the function accordingly.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h      | 2 +-
 kernel/time/tick-sched.c  | 2 +-
 kernel/time/timekeeping.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index ceaab9fff155..2091a19f1655 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -120,7 +120,7 @@ extern void getboottime(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
-extern int timekeeping_is_continuous(void);
+extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 88267f0a8471..fa9bb73dbdb4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -681,7 +681,7 @@ int tick_check_oneshot_change(int allow_nohz)
 	if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
 		return 0;
 
-	if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
+	if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
 		return 0;
 
 	if (!allow_nohz)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cd5dbc4579c9..4f2637eed0f6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -201,9 +201,9 @@ static inline s64 __get_nsec_offset(void) { return 0; }
 #endif
 
 /**
- * timekeeping_is_continuous - check to see if timekeeping is free running
+ * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  */
-int timekeeping_is_continuous(void)
+int timekeeping_valid_for_hres(void)
 {
 	unsigned long seq;
 	int ret;
-- 
cgit 


From 3eb056764dd806bbe84eb604e45e7470feeaafd8 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 8 Feb 2008 04:19:25 -0800
Subject: time: fix typo in comments

Fix typo in comments.

BTW: I have to fix coding style in arch/ia64/kernel/time.c also, otherwise
checkpatch.pl will be complaining.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/time.c   | 14 +++++++-------
 arch/x86/kernel/time_64.c |  2 +-
 include/linux/hrtimer.h   |  2 +-
 include/linux/jiffies.h   |  6 +++---
 kernel/time.c             |  4 ++--
 kernel/time/clockevents.c |  2 +-
 kernel/time/timekeeping.c |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 3ab042720970..17fda5293c67 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -49,13 +49,13 @@ EXPORT_SYMBOL(last_cli_ip);
 #endif
 
 static struct clocksource clocksource_itc = {
-        .name           = "itc",
-        .rating         = 350,
-        .read           = itc_get_cycles,
-        .mask           = CLOCKSOURCE_MASK(64),
-        .mult           = 0, /*to be caluclated*/
-        .shift          = 16,
-        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+	.name           = "itc",
+	.rating         = 350,
+	.read           = itc_get_cycles,
+	.mask           = CLOCKSOURCE_MASK(64),
+	.mult           = 0, /*to be calculated*/
+	.shift          = 16,
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static struct clocksource *itc_clocksource;
 
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 0380795121a6..c737849e2ef7 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -77,7 +77,7 @@ unsigned long __init native_calculate_cpu_khz(void)
 		reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
 	local_irq_save(flags);
-	/* start meauring cycles, incrementing from 0 */
+	/* start measuring cycles, incrementing from 0 */
 	wrmsrl(MSR_K7_PERFCTR0 + i, 0);
 	wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
 	rdtscl(tsc_start);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 203591e23210..600fc3bcf63e 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -78,7 +78,7 @@ enum hrtimer_cb_mode {
  * as otherwise the timer could be removed before the softirq code finishes the
  * the handling of the timer.
  *
- * The HRTIMER_STATE_ENQUEUE bit is always or'ed to the current state to
+ * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to
  * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario.
  *
  * All state transitions are protected by cpu_base->lock.
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 7ba9e47bf061..e0b5b684d83f 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -42,7 +42,7 @@
 /* LATCH is used in the interval timer and ftape setup. */
 #define LATCH  ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
 
-/* Suppose we want to devide two numbers NOM and DEN: NOM/DEN, the we can
+/* Suppose we want to devide two numbers NOM and DEN: NOM/DEN, then we can
  * improve accuracy by shifting LSH bits, hence calculating:
  *     (NOM << LSH) / DEN
  * This however means trouble for large NOM, because (NOM << LSH) may no
@@ -160,7 +160,7 @@ extern unsigned long preset_lpj;
  * We want to do realistic conversions of time so we need to use the same
  * values the update wall clock code uses as the jiffies size.  This value
  * is: TICK_NSEC (which is defined in timex.h).  This
- * is a constant and is in nanoseconds.  We will used scaled math
+ * is a constant and is in nanoseconds.  We will use scaled math
  * with a set of scales defined here as SEC_JIFFIE_SC,  USEC_JIFFIE_SC and
  * NSEC_JIFFIE_SC.  Note that these defines contain nothing but
  * constants and so are computed at compile time.  SHIFT_HZ (computed in
@@ -204,7 +204,7 @@ extern unsigned long preset_lpj;
  * operator if the result is a long long AND at least one of the
  * operands is cast to long long (usually just prior to the "*" so as
  * not to confuse it into thinking it really has a 64-bit operand,
- * which, buy the way, it can do, but it take more code and at least 2
+ * which, buy the way, it can do, but it takes more code and at least 2
  * mpys).
 
  * We also need to be aware that one second in nanoseconds is only a
diff --git a/kernel/time.c b/kernel/time.c
index 33af3e55570d..3b705ecc3fb7 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -267,7 +267,7 @@ EXPORT_SYMBOL(jiffies_to_usecs);
  *
  * This function should be only used for timestamps returned by
  * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
- * it doesn't handle the better resolution of the later.
+ * it doesn't handle the better resolution of the latter.
  */
 struct timespec timespec_trunc(struct timespec t, unsigned gran)
 {
@@ -315,7 +315,7 @@ EXPORT_SYMBOL_GPL(getnstimeofday);
  * This algorithm was first published by Gauss (I think).
  *
  * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
+ * machines where long is 32-bit! (However, as time_t is signed, we
  * will already get problems at other places on 2038-01-19 03:14:08)
  */
 unsigned long
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1d327f6db424..3d1e3e1a1971 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -133,7 +133,7 @@ static void clockevents_do_notify(unsigned long reason, void *dev)
 }
 
 /*
- * Called after a notify add to make devices availble which were
+ * Called after a notify add to make devices available which were
  * released from the notifier call.
  */
 static void clockevents_notify_released(void)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4f2637eed0f6..1af9fb050fe2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -364,7 +364,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
 	 * with losing too many ticks, otherwise we would overadjust and
 	 * produce an even larger error.  The smaller the adjustment the
 	 * faster we try to adjust for it, as lost ticks can do less harm
-	 * here.  This is tuned so that an error of about 1 msec is adusted
+	 * here.  This is tuned so that an error of about 1 msec is adjusted
 	 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
 	 */
 	error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ);
-- 
cgit 


From 7fa3031500ec9b0a7460c8c23751799006ffee74 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Feb 2008 04:19:28 -0800
Subject: aout: suppress A.OUT library support if !CONFIG_ARCH_SUPPORTS_AOUT

Suppress A.OUT library support if CONFIG_ARCH_SUPPORTS_AOUT is not set.

Not all architectures support the A.OUT binfmt, so the ELF binfmt should not
be permitted to go looking for A.OUT libraries to load in such a case.  Not
only that, but under such conditions A.OUT core dumps are not produced either.

To make this work, this patch also does the following:

 (1) Makes the existence of the contents of linux/a.out.h contingent on
     CONFIG_ARCH_SUPPORTS_AOUT.

 (2) Renames dump_thread() to aout_dump_thread() as it's only called by A.OUT
     core dumping code.

 (3) Moves aout_dump_thread() into asm/a.out-core.h and makes it inline.  This
     is then included only where needed.  This means that this bit of arch
     code will be stored in the appropriate A.OUT binfmt module rather than
     the core kernel.

 (4) Drops A.OUT support for Blackfin (according to Mike Frysinger it's not
     needed) and FRV.

This patch depends on the previous patch to move STACK_TOP[_MAX] out of
asm/a.out.h and into asm/processor.h as they're required whether or not A.OUT
format is available.

[jdike@addtoit.com: uml: re-remove accidentally restored code]
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/process.c         | 62 ----------------------------
 arch/arm/kernel/process.c           | 29 --------------
 arch/m68k/kernel/process.c          | 47 ----------------------
 arch/sparc/kernel/process.c         | 32 ---------------
 arch/sparc/kernel/sparc_ksyms.c     |  2 -
 arch/sparc64/kernel/binfmt_aout32.c |  3 +-
 arch/sparc64/kernel/process.c       | 11 -----
 arch/um/kernel/ksyms.c              |  1 -
 arch/um/kernel/process.c            |  4 --
 arch/x86/kernel/process_32.c        | 49 -----------------------
 fs/Kconfig.binfmt                   |  3 +-
 fs/binfmt_aout.c                    |  3 +-
 fs/binfmt_elf.c                     | 34 ++++++++++++----
 fs/exec.c                           |  2 +-
 include/asm-alpha/a.out-core.h      | 80 +++++++++++++++++++++++++++++++++++++
 include/asm-arm/a.out-core.h        | 49 +++++++++++++++++++++++
 include/asm-frv/a.out.h             |  5 ---
 include/asm-generic/Kbuild.asm      |  2 +
 include/asm-m68k/a.out-core.h       | 67 +++++++++++++++++++++++++++++++
 include/asm-sparc/a.out-core.h      | 52 ++++++++++++++++++++++++
 include/asm-sparc64/a.out-core.h    | 31 ++++++++++++++
 include/asm-um/a.out-core.h         | 27 +++++++++++++
 include/asm-x86/a.out-core.h        | 71 ++++++++++++++++++++++++++++++++
 include/linux/a.out.h               | 12 +++++-
 include/linux/kernel.h              |  2 -
 25 files changed, 423 insertions(+), 257 deletions(-)
 create mode 100644 include/asm-alpha/a.out-core.h
 create mode 100644 include/asm-arm/a.out-core.h
 delete mode 100644 include/asm-frv/a.out.h
 create mode 100644 include/asm-m68k/a.out-core.h
 create mode 100644 include/asm-sparc/a.out-core.h
 create mode 100644 include/asm-sparc64/a.out-core.h
 create mode 100644 include/asm-um/a.out-core.h
 create mode 100644 include/asm-x86/a.out-core.h

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 92b61629fe3f..9aed47763787 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -317,68 +317,6 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-/*
- * Fill in the user structure for an ECOFF core dump.
- */
-void
-dump_thread(struct pt_regs * pt, struct user * dump)
-{
-	/* switch stack follows right below pt_regs: */
-	struct switch_stack * sw = ((struct switch_stack *) pt) - 1;
-
-	dump->magic = CMAGIC;
-	dump->start_code  = current->mm->start_code;
-	dump->start_data  = current->mm->start_data;
-	dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
-	dump->u_tsize = ((current->mm->end_code - dump->start_code)
-			 >> PAGE_SHIFT);
-	dump->u_dsize = ((current->mm->brk + PAGE_SIZE-1 - dump->start_data)
-			 >> PAGE_SHIFT);
-	dump->u_ssize = (current->mm->start_stack - dump->start_stack
-			 + PAGE_SIZE-1) >> PAGE_SHIFT;
-
-	/*
-	 * We store the registers in an order/format that is
-	 * compatible with DEC Unix/OSF/1 as this makes life easier
-	 * for gdb.
-	 */
-	dump->regs[EF_V0]  = pt->r0;
-	dump->regs[EF_T0]  = pt->r1;
-	dump->regs[EF_T1]  = pt->r2;
-	dump->regs[EF_T2]  = pt->r3;
-	dump->regs[EF_T3]  = pt->r4;
-	dump->regs[EF_T4]  = pt->r5;
-	dump->regs[EF_T5]  = pt->r6;
-	dump->regs[EF_T6]  = pt->r7;
-	dump->regs[EF_T7]  = pt->r8;
-	dump->regs[EF_S0]  = sw->r9;
-	dump->regs[EF_S1]  = sw->r10;
-	dump->regs[EF_S2]  = sw->r11;
-	dump->regs[EF_S3]  = sw->r12;
-	dump->regs[EF_S4]  = sw->r13;
-	dump->regs[EF_S5]  = sw->r14;
-	dump->regs[EF_S6]  = sw->r15;
-	dump->regs[EF_A3]  = pt->r19;
-	dump->regs[EF_A4]  = pt->r20;
-	dump->regs[EF_A5]  = pt->r21;
-	dump->regs[EF_T8]  = pt->r22;
-	dump->regs[EF_T9]  = pt->r23;
-	dump->regs[EF_T10] = pt->r24;
-	dump->regs[EF_T11] = pt->r25;
-	dump->regs[EF_RA]  = pt->r26;
-	dump->regs[EF_T12] = pt->r27;
-	dump->regs[EF_AT]  = pt->r28;
-	dump->regs[EF_SP]  = rdusp();
-	dump->regs[EF_PS]  = pt->ps;
-	dump->regs[EF_PC]  = pt->pc;
-	dump->regs[EF_GP]  = pt->gp;
-	dump->regs[EF_A0]  = pt->r16;
-	dump->regs[EF_A1]  = pt->r17;
-	dump->regs[EF_A2]  = pt->r18;
-	memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8);
-}
-EXPORT_SYMBOL(dump_thread);
-
 /*
  * Fill in the user structure for a ELF core dump.
  */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 4f1a03124a74..436380a5f4c7 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -367,35 +367,6 @@ int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
 }
 EXPORT_SYMBOL(dump_fpu);
 
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-	struct task_struct *tsk = current;
-
-	dump->magic = CMAGIC;
-	dump->start_code = tsk->mm->start_code;
-	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
-
-	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
-	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	dump->u_ssize = 0;
-
-	dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
-	dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
-	dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn.arm;
-	dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn.arm;
-	dump->u_debugreg[4] = tsk->thread.debug.nsaved;
-
-	if (dump->start_stack < 0x04000000)
-		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
-
-	dump->regs = *regs;
-	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
-}
-EXPORT_SYMBOL(dump_thread);
-
 /*
  * Shuffle the argument into the correct register before calling the
  * thread function.  r1 is the thread argument, r2 is the pointer to
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index f85b928ffac4..5f45567318df 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -315,53 +315,6 @@ int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu)
 }
 EXPORT_SYMBOL(dump_fpu);
 
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-	struct switch_stack *sw;
-
-/* changed the size calculations - should hopefully work better. lbt */
-	dump->magic = CMAGIC;
-	dump->start_code = 0;
-	dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
-	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
-	dump->u_dsize = ((unsigned long) (current->mm->brk +
-					  (PAGE_SIZE-1))) >> PAGE_SHIFT;
-	dump->u_dsize -= dump->u_tsize;
-	dump->u_ssize = 0;
-
-	if (dump->start_stack < TASK_SIZE)
-		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
-	dump->u_ar0 = offsetof(struct user, regs);
-	sw = ((struct switch_stack *)regs) - 1;
-	dump->regs.d1 = regs->d1;
-	dump->regs.d2 = regs->d2;
-	dump->regs.d3 = regs->d3;
-	dump->regs.d4 = regs->d4;
-	dump->regs.d5 = regs->d5;
-	dump->regs.d6 = sw->d6;
-	dump->regs.d7 = sw->d7;
-	dump->regs.a0 = regs->a0;
-	dump->regs.a1 = regs->a1;
-	dump->regs.a2 = regs->a2;
-	dump->regs.a3 = sw->a3;
-	dump->regs.a4 = sw->a4;
-	dump->regs.a5 = sw->a5;
-	dump->regs.a6 = sw->a6;
-	dump->regs.d0 = regs->d0;
-	dump->regs.orig_d0 = regs->orig_d0;
-	dump->regs.stkadj = regs->stkadj;
-	dump->regs.sr = regs->sr;
-	dump->regs.pc = regs->pc;
-	dump->regs.fmtvec = (regs->format << 12) | regs->vector;
-	/* dump floating point stuff */
-	dump->u_fpvalid = dump_fpu (regs, &dump->m68kfp);
-}
-EXPORT_SYMBOL(dump_thread);
-
 /*
  * sys_execve() executes a new program.
  */
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index 77460e316a03..a248e81caa0e 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -566,38 +566,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	return 0;
 }
 
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-	unsigned long first_stack_page;
-
-	dump->magic = SUNOS_CORE_MAGIC;
-	dump->len = sizeof(struct user);
-	dump->regs.psr = regs->psr;
-	dump->regs.pc = regs->pc;
-	dump->regs.npc = regs->npc;
-	dump->regs.y = regs->y;
-	/* fuck me plenty */
-	memcpy(&dump->regs.regs[0], &regs->u_regs[1], (sizeof(unsigned long) * 15));
-	dump->uexec = current->thread.core_exec;
-	dump->u_tsize = (((unsigned long) current->mm->end_code) -
-		((unsigned long) current->mm->start_code)) & ~(PAGE_SIZE - 1);
-	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1)));
-	dump->u_dsize -= dump->u_tsize;
-	dump->u_dsize &= ~(PAGE_SIZE - 1);
-	first_stack_page = (regs->u_regs[UREG_FP] & ~(PAGE_SIZE - 1));
-	dump->u_ssize = (TASK_SIZE - first_stack_page) & ~(PAGE_SIZE - 1);
-	memcpy(&dump->fpu.fpstatus.fregs.regs[0], &current->thread.float_regs[0], (sizeof(unsigned long) * 32));
-	dump->fpu.fpstatus.fsr = current->thread.fsr;
-	dump->fpu.fpstatus.flags = dump->fpu.fpstatus.extra = 0;
-	dump->fpu.fpstatus.fpq_count = current->thread.fpqdepth;
-	memcpy(&dump->fpu.fpstatus.fpq[0], &current->thread.fpqueue[0],
-	       ((sizeof(unsigned long) * 2) * 16));
-	dump->sigcode = 0;
-}
-
 /*
  * fill in the fpu structure for a core dump.
  */
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index ef647acc479e..62f6221db74f 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -214,8 +214,6 @@ EXPORT_SYMBOL(kunmap_atomic);
 EXPORT_SYMBOL(svr4_setcontext);
 EXPORT_SYMBOL(svr4_getcontext);
 
-EXPORT_SYMBOL(dump_thread);
-
 /* prom symbols */
 EXPORT_SYMBOL(idprom);
 EXPORT_SYMBOL(prom_root_node);
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index 92c1b36a2e16..9877f2d7672d 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -32,6 +32,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
+#include <asm/a.out-core.h>
 
 static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout32_library(struct file*);
@@ -101,7 +102,7 @@ static int aout32_core_dump(long signr, struct pt_regs *regs, struct file *file,
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.signal = signr;
-	dump_thread(regs, &dump);
+	aout_dump_thread(regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index ca7cdfd55f72..6e21785bb36d 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -725,17 +725,6 @@ pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	return retval;
 }
 
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-	/* Only should be used for SunOS and ancient a.out
-	 * SparcLinux binaries...  Not worth implementing.
-	 */
-	memset(dump, 0, sizeof(struct user));
-}
-
 typedef struct {
 	union {
 		unsigned int	pr_regs[32];
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 5311ee93ede3..19ce9dbf46c3 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -60,7 +60,6 @@ EXPORT_SYMBOL(os_accept_connection);
 EXPORT_SYMBOL(os_rcv_fd);
 EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(start_thread);
-EXPORT_SYMBOL(dump_thread);
 
 #ifdef CONFIG_SMP
 
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c07961bedb75..fc50d2f959d1 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -258,10 +258,6 @@ void cpu_idle(void)
 	default_idle();
 }
 
-void dump_thread(struct pt_regs *regs, struct user *u)
-{
-}
-
 int __cant_sleep(void) {
 	return in_atomic() || irqs_disabled() || in_interrupt();
 	/* Is in_interrupt() really needed? */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index dabdbeff1f77..78858068e24d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -539,55 +539,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	return err;
 }
 
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
-	u16 gs;
-
-/* changed the size calculations - should hopefully work better. lbt */
-	dump->magic = CMAGIC;
-	dump->start_code = 0;
-	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
-	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
-	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
-	dump->u_dsize -= dump->u_tsize;
-	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;
-	dump->u_debugreg[1] = current->thread.debugreg1;
-	dump->u_debugreg[2] = current->thread.debugreg2;
-	dump->u_debugreg[3] = current->thread.debugreg3;
-	dump->u_debugreg[4] = 0;
-	dump->u_debugreg[5] = 0;
-	dump->u_debugreg[6] = current->thread.debugreg6;
-	dump->u_debugreg[7] = current->thread.debugreg7;
-
-	if (dump->start_stack < TASK_SIZE)
-		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
-	dump->regs.bx = regs->bx;
-	dump->regs.cx = regs->cx;
-	dump->regs.dx = regs->dx;
-	dump->regs.si = regs->si;
-	dump->regs.di = regs->di;
-	dump->regs.bp = regs->bp;
-	dump->regs.ax = regs->ax;
-	dump->regs.ds = (u16)regs->ds;
-	dump->regs.es = (u16)regs->es;
-	dump->regs.fs = (u16)regs->fs;
-	savesegment(gs,gs);
-	dump->regs.orig_ax = regs->orig_ax;
-	dump->regs.ip = regs->ip;
-	dump->regs.cs = (u16)regs->cs;
-	dump->regs.flags = regs->flags;
-	dump->regs.sp = regs->sp;
-	dump->regs.ss = (u16)regs->ss;
-
-	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
-}
-EXPORT_SYMBOL(dump_thread);
-
 #ifdef CONFIG_SECCOMP
 static void hard_disable_TSC(void)
 {
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 7c3d5f923da1..b5c3b6114add 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -61,7 +61,8 @@ config BINFMT_SHARED_FLAT
 
 config BINFMT_AOUT
 	tristate "Kernel support for a.out and ECOFF binaries"
-	depends on X86_32 || ALPHA || ARM || M68K || SPARC32
+	depends on ARCH_SUPPORTS_AOUT && \
+		(X86_32 || ALPHA || ARM || M68K || SPARC32)
 	---help---
 	  A.out (Assembler.OUTput) is a set of formats for libraries and
 	  executables used in the earliest versions of UNIX.  Linux used
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 7f65e71bf859..a1bb2244cac7 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -28,6 +28,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
+#include <asm/a.out-core.h>
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
@@ -118,7 +119,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	dump.u_ar0 = offsetof(struct user, regs);
 #endif
 	dump.signal = signr;
-	dump_thread(regs, &dump);
+	aout_dump_thread(regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 111771d38e6e..a93b1170551b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -513,6 +513,7 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_ARCH_SUPPORTS_AOUT
 static unsigned long load_aout_interp(struct exec *interp_ex,
 		struct file *interpreter)
 {
@@ -558,6 +559,14 @@ static unsigned long load_aout_interp(struct exec *interp_ex,
 out:
 	return elf_entry;
 }
+#else
+/* dummy extern - the function should never be called if !CONFIG_AOUT_BINFMT */
+static inline unsigned long load_aout_interp(struct exec *interp_ex,
+					     struct file *interpreter)
+{
+	return -ELIBACC;
+}
+#endif
 
 /*
  * These are the functions used to load ELF style executables and shared
@@ -565,9 +574,15 @@ out:
  */
 
 #define INTERPRETER_NONE 0
-#define INTERPRETER_AOUT 1
 #define INTERPRETER_ELF 2
 
+#ifdef CONFIG_ARCH_SUPPORTS_AOUT
+#define INTERPRETER_AOUT 1
+#define IS_AOUT_INTERP(x) ((x) == INTERPRETER_AOUT)
+#else
+#define IS_AOUT_INTERP(x) (0)
+#endif
+
 #ifndef STACK_RND_MASK
 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
 #endif
@@ -775,6 +790,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	/* Some simple consistency checks for the interpreter */
 	if (elf_interpreter) {
 		static int warn;
+#ifdef CONFIG_ARCH_SUPPORTS_AOUT
 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 
 		/* Now figure out which format our binary is */
@@ -782,11 +798,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
 			interpreter_type = INTERPRETER_ELF;
-
+#else
+		interpreter_type = INTERPRETER_ELF;
+#endif
 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 			interpreter_type &= ~INTERPRETER_ELF;
 
-		if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
+		if (IS_AOUT_INTERP(interpreter_type) && warn < 10) {
 			printk(KERN_WARNING "a.out ELF interpreter %s is "
 				"deprecated and will not be supported "
 				"after Linux 2.6.25\n", elf_interpreter);
@@ -815,7 +833,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	/* OK, we are done with that, now set up the arg stuff,
 	   and then start this sucker up */
-	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
+	if (IS_AOUT_INTERP(interpreter_type) && !bprm->sh_bang) {
 		char *passed_p = passed_fileno;
 		sprintf(passed_fileno, "%d", elf_exec_fileno);
 
@@ -1004,7 +1022,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	}
 
 	if (elf_interpreter) {
-		if (interpreter_type == INTERPRETER_AOUT) {
+		if (IS_AOUT_INTERP(interpreter_type)) {
 			elf_entry = load_aout_interp(&loc->interp_ex,
 						     interpreter);
 		} else {
@@ -1045,7 +1063,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	kfree(elf_phdata);
 
-	if (interpreter_type != INTERPRETER_AOUT)
+	if (!IS_AOUT_INTERP(interpreter_type))
 		sys_close(elf_exec_fileno);
 
 	set_binfmt(&elf_format);
@@ -1061,14 +1079,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
 	retval = create_elf_tables(bprm, &loc->elf_ex,
-			  (interpreter_type == INTERPRETER_AOUT),
+			  IS_AOUT_INTERP(interpreter_type),
 			  load_addr, interp_load_addr);
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
 		goto out;
 	}
 	/* N.B. passed_fileno might not be initialized? */
-	if (interpreter_type == INTERPRETER_AOUT)
+	if (IS_AOUT_INTERP(interpreter_type))
 		current->mm->arg_start += strlen(passed_fileno) + 1;
 	current->mm->end_code = end_code;
 	current->mm->start_code = start_code;
diff --git a/fs/exec.c b/fs/exec.c
index 927a7c5ef4af..9f9c27224d7c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1166,7 +1166,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 {
 	int try,retval;
 	struct linux_binfmt *fmt;
-#ifdef __alpha__
+#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT)
 	/* handle /sbin/loader.. */
 	{
 	    struct exec * eh = (struct exec *) bprm->buf;
diff --git a/include/asm-alpha/a.out-core.h b/include/asm-alpha/a.out-core.h
new file mode 100644
index 000000000000..9e33e92e524c
--- /dev/null
+++ b/include/asm-alpha/a.out-core.h
@@ -0,0 +1,80 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+
+/*
+ * Fill in the user structure for an ECOFF core dump.
+ */
+static inline void aout_dump_thread(struct pt_regs *pt, struct user *dump)
+{
+	/* switch stack follows right below pt_regs: */
+	struct switch_stack * sw = ((struct switch_stack *) pt) - 1;
+
+	dump->magic = CMAGIC;
+	dump->start_code  = current->mm->start_code;
+	dump->start_data  = current->mm->start_data;
+	dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
+	dump->u_tsize = ((current->mm->end_code - dump->start_code)
+			 >> PAGE_SHIFT);
+	dump->u_dsize = ((current->mm->brk + PAGE_SIZE-1 - dump->start_data)
+			 >> PAGE_SHIFT);
+	dump->u_ssize = (current->mm->start_stack - dump->start_stack
+			 + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	/*
+	 * We store the registers in an order/format that is
+	 * compatible with DEC Unix/OSF/1 as this makes life easier
+	 * for gdb.
+	 */
+	dump->regs[EF_V0]  = pt->r0;
+	dump->regs[EF_T0]  = pt->r1;
+	dump->regs[EF_T1]  = pt->r2;
+	dump->regs[EF_T2]  = pt->r3;
+	dump->regs[EF_T3]  = pt->r4;
+	dump->regs[EF_T4]  = pt->r5;
+	dump->regs[EF_T5]  = pt->r6;
+	dump->regs[EF_T6]  = pt->r7;
+	dump->regs[EF_T7]  = pt->r8;
+	dump->regs[EF_S0]  = sw->r9;
+	dump->regs[EF_S1]  = sw->r10;
+	dump->regs[EF_S2]  = sw->r11;
+	dump->regs[EF_S3]  = sw->r12;
+	dump->regs[EF_S4]  = sw->r13;
+	dump->regs[EF_S5]  = sw->r14;
+	dump->regs[EF_S6]  = sw->r15;
+	dump->regs[EF_A3]  = pt->r19;
+	dump->regs[EF_A4]  = pt->r20;
+	dump->regs[EF_A5]  = pt->r21;
+	dump->regs[EF_T8]  = pt->r22;
+	dump->regs[EF_T9]  = pt->r23;
+	dump->regs[EF_T10] = pt->r24;
+	dump->regs[EF_T11] = pt->r25;
+	dump->regs[EF_RA]  = pt->r26;
+	dump->regs[EF_T12] = pt->r27;
+	dump->regs[EF_AT]  = pt->r28;
+	dump->regs[EF_SP]  = rdusp();
+	dump->regs[EF_PS]  = pt->ps;
+	dump->regs[EF_PC]  = pt->pc;
+	dump->regs[EF_GP]  = pt->gp;
+	dump->regs[EF_A0]  = pt->r16;
+	dump->regs[EF_A1]  = pt->r17;
+	dump->regs[EF_A2]  = pt->r18;
+	memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8);
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/asm-arm/a.out-core.h b/include/asm-arm/a.out-core.h
new file mode 100644
index 000000000000..93d04acaa31f
--- /dev/null
+++ b/include/asm-arm/a.out-core.h
@@ -0,0 +1,49 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+#include <linux/elfcore.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
+{
+	struct task_struct *tsk = current;
+
+	dump->magic = CMAGIC;
+	dump->start_code = tsk->mm->start_code;
+	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
+
+	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
+	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dump->u_ssize = 0;
+
+	dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
+	dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
+	dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn.arm;
+	dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn.arm;
+	dump->u_debugreg[4] = tsk->thread.debug.nsaved;
+
+	if (dump->start_stack < 0x04000000)
+		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
+
+	dump->regs = *regs;
+	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/asm-frv/a.out.h b/include/asm-frv/a.out.h
deleted file mode 100644
index dd3b7e5754c9..000000000000
--- a/include/asm-frv/a.out.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/*
- * FRV doesn't do AOUT format. This header file should be removed as
- * soon as fs/exec.c and fs/proc/kcore.c and the archs that require
- * them to include linux/a.out.h are fixed.
- */
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 57ba60635959..fd9dcfd91c39 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,4 +1,6 @@
+ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h)
 unifdef-y += a.out.h
+endif
 unifdef-y += auxvec.h
 unifdef-y += byteorder.h
 unifdef-y += errno.h
diff --git a/include/asm-m68k/a.out-core.h b/include/asm-m68k/a.out-core.h
new file mode 100644
index 000000000000..f6bfc1d63ff6
--- /dev/null
+++ b/include/asm-m68k/a.out-core.h
@@ -0,0 +1,67 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+#include <linux/elfcore.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
+{
+	struct switch_stack *sw;
+
+/* changed the size calculations - should hopefully work better. lbt */
+	dump->magic = CMAGIC;
+	dump->start_code = 0;
+	dump->start_stack = rdusp() & ~(PAGE_SIZE - 1);
+	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+	dump->u_dsize = ((unsigned long) (current->mm->brk +
+					  (PAGE_SIZE-1))) >> PAGE_SHIFT;
+	dump->u_dsize -= dump->u_tsize;
+	dump->u_ssize = 0;
+
+	if (dump->start_stack < TASK_SIZE)
+		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+	dump->u_ar0 = offsetof(struct user, regs);
+	sw = ((struct switch_stack *)regs) - 1;
+	dump->regs.d1 = regs->d1;
+	dump->regs.d2 = regs->d2;
+	dump->regs.d3 = regs->d3;
+	dump->regs.d4 = regs->d4;
+	dump->regs.d5 = regs->d5;
+	dump->regs.d6 = sw->d6;
+	dump->regs.d7 = sw->d7;
+	dump->regs.a0 = regs->a0;
+	dump->regs.a1 = regs->a1;
+	dump->regs.a2 = regs->a2;
+	dump->regs.a3 = sw->a3;
+	dump->regs.a4 = sw->a4;
+	dump->regs.a5 = sw->a5;
+	dump->regs.a6 = sw->a6;
+	dump->regs.d0 = regs->d0;
+	dump->regs.orig_d0 = regs->orig_d0;
+	dump->regs.stkadj = regs->stkadj;
+	dump->regs.sr = regs->sr;
+	dump->regs.pc = regs->pc;
+	dump->regs.fmtvec = (regs->format << 12) | regs->vector;
+	/* dump floating point stuff */
+	dump->u_fpvalid = dump_fpu (regs, &dump->m68kfp);
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/asm-sparc/a.out-core.h b/include/asm-sparc/a.out-core.h
new file mode 100644
index 000000000000..e8fd338ed0b2
--- /dev/null
+++ b/include/asm-sparc/a.out-core.h
@@ -0,0 +1,52 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
+{
+	unsigned long first_stack_page;
+
+	dump->magic = SUNOS_CORE_MAGIC;
+	dump->len = sizeof(struct user);
+	dump->regs.psr = regs->psr;
+	dump->regs.pc = regs->pc;
+	dump->regs.npc = regs->npc;
+	dump->regs.y = regs->y;
+	/* fuck me plenty */
+	memcpy(&dump->regs.regs[0], &regs->u_regs[1], (sizeof(unsigned long) * 15));
+	dump->uexec = current->thread.core_exec;
+	dump->u_tsize = (((unsigned long) current->mm->end_code) -
+		((unsigned long) current->mm->start_code)) & ~(PAGE_SIZE - 1);
+	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1)));
+	dump->u_dsize -= dump->u_tsize;
+	dump->u_dsize &= ~(PAGE_SIZE - 1);
+	first_stack_page = (regs->u_regs[UREG_FP] & ~(PAGE_SIZE - 1));
+	dump->u_ssize = (TASK_SIZE - first_stack_page) & ~(PAGE_SIZE - 1);
+	memcpy(&dump->fpu.fpstatus.fregs.regs[0], &current->thread.float_regs[0], (sizeof(unsigned long) * 32));
+	dump->fpu.fpstatus.fsr = current->thread.fsr;
+	dump->fpu.fpstatus.flags = dump->fpu.fpstatus.extra = 0;
+	dump->fpu.fpstatus.fpq_count = current->thread.fpqdepth;
+	memcpy(&dump->fpu.fpstatus.fpq[0], &current->thread.fpqueue[0],
+	       ((sizeof(unsigned long) * 2) * 16));
+	dump->sigcode = 0;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/asm-sparc64/a.out-core.h b/include/asm-sparc64/a.out-core.h
new file mode 100644
index 000000000000..3499b3c425ca
--- /dev/null
+++ b/include/asm-sparc64/a.out-core.h
@@ -0,0 +1,31 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
+{
+	/* Only should be used for SunOS and ancient a.out
+	 * SparcLinux binaries...  Not worth implementing.
+	 */
+	memset(dump, 0, sizeof(struct user));
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/asm-um/a.out-core.h b/include/asm-um/a.out-core.h
new file mode 100644
index 000000000000..995643b18309
--- /dev/null
+++ b/include/asm-um/a.out-core.h
@@ -0,0 +1,27 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef __UM_A_OUT_CORE_H
+#define __UM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+
+#include <linux/user.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *u)
+{
+}
+
+#endif /* __KERNEL__ */
+#endif /* __UM_A_OUT_CORE_H */
diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h
new file mode 100644
index 000000000000..d2b6e11d3e97
--- /dev/null
+++ b/include/asm-x86/a.out-core.h
@@ -0,0 +1,71 @@
+/* a.out coredump register dumper
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_A_OUT_CORE_H
+#define _ASM_A_OUT_CORE_H
+
+#ifdef __KERNEL__
+#ifdef CONFIG_X86_32
+
+#include <linux/user.h>
+#include <linux/elfcore.h>
+
+/*
+ * fill in the user structure for an a.out core dump
+ */
+static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
+{
+	u16 gs;
+
+/* changed the size calculations - should hopefully work better. lbt */
+	dump->magic = CMAGIC;
+	dump->start_code = 0;
+	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
+	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+	dump->u_dsize -= dump->u_tsize;
+	dump->u_ssize = 0;
+	dump->u_debugreg[0] = current->thread.debugreg0;
+	dump->u_debugreg[1] = current->thread.debugreg1;
+	dump->u_debugreg[2] = current->thread.debugreg2;
+	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+	dump->u_debugreg[7] = current->thread.debugreg7;
+
+	if (dump->start_stack < TASK_SIZE)
+		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+	dump->regs.bx = regs->bx;
+	dump->regs.cx = regs->cx;
+	dump->regs.dx = regs->dx;
+	dump->regs.si = regs->si;
+	dump->regs.di = regs->di;
+	dump->regs.bp = regs->bp;
+	dump->regs.ax = regs->ax;
+	dump->regs.ds = (u16)regs->ds;
+	dump->regs.es = (u16)regs->es;
+	dump->regs.fs = (u16)regs->fs;
+	savesegment(gs,gs);
+	dump->regs.orig_ax = regs->orig_ax;
+	dump->regs.ip = regs->ip;
+	dump->regs.cs = (u16)regs->cs;
+	dump->regs.flags = regs->flags;
+	dump->regs.sp = regs->sp;
+	dump->regs.ss = (u16)regs->ss;
+
+	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+#endif /* CONFIG_X86_32 */
+#endif /* __KERNEL__ */
+#endif /* _ASM_A_OUT_CORE_H */
diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index 82cd918f2ab7..208f4e8ed304 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -1,6 +1,8 @@
 #ifndef __A_OUT_GNU_H__
 #define __A_OUT_GNU_H__
 
+#ifdef CONFIG_ARCH_SUPPORTS_AOUT
+
 #define __GNU_EXEC_MACROS__
 
 #ifndef __STRUCT_EXEC_OVERRIDE__
@@ -9,6 +11,8 @@
 
 #endif /* __STRUCT_EXEC_OVERRIDE__ */
 
+#ifndef __ASSEMBLY__
+
 /* these go in the N_MACHTYPE field */
 enum machine_type {
 #if defined (M_OLDSUN2)
@@ -272,5 +276,11 @@ struct relocation_info
 };
 #endif /* no N_RELOCATION_INFO_DECLARED.  */
 
-
+#endif /*__ASSEMBLY__ */
+#else /* CONFIG_ARCH_SUPPORTS_AOUT */
+#ifndef __ASSEMBLY__
+struct exec {
+};
+#endif
+#endif /* CONFIG_ARCH_SUPPORTS_AOUT */
 #endif /* __A_OUT_GNU_H__ */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9e01f376840a..568042290c0b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -172,8 +172,6 @@ extern int kernel_text_address(unsigned long addr);
 struct pid;
 extern struct pid *session_of_pgrp(struct pid *pgrp);
 
-extern void dump_thread(struct pt_regs *regs, struct user *dump);
-
 #ifdef CONFIG_PRINTK
 asmlinkage int vprintk(const char *fmt, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
-- 
cgit 


From ef3d534754f31fed9c3b976fee1ece1b3bc38282 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Feb 2008 04:19:30 -0800
Subject: mn10300: allocate serial port UART IDs for on-chip serial ports

Allocate serial port UART type IDs for the MN10300 on-chip serial ports.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 9963f81fea9a..1a0b6cf83ff1 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -150,6 +150,10 @@
 #define PORT_MCF	78
 
 
+/* MN10300 on-chip UART numbers */
+#define PORT_MN10300		80
+#define PORT_MN10300_CTS	81
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit 


From b920de1b77b72ca9432ac3f97edb26541e65e5dd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Feb 2008 04:19:31 -0800
Subject: mn10300: add the MN10300/AM33 architecture to the kernel

Add architecture support for the MN10300/AM33 CPUs produced by MEI to the
kernel.

This patch also adds board support for the ASB2303 with the ASB2308 daughter
board, and the ASB2305.  The only processor supported is the MN103E010, which
is an AM33v2 core plus on-chip devices.

[akpm@linux-foundation.org: nuke cvs control strings]
Signed-off-by: Masakazu Urade <urade.masakazu@jp.panasonic.com>
Signed-off-by: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/mn10300/ABI.txt                  |  149 ++
 Documentation/mn10300/compartmentalisation.txt |   60 +
 MAINTAINERS                                    |    9 +
 arch/mn10300/Kconfig                           |  381 +++++
 arch/mn10300/Kconfig.debug                     |  135 ++
 arch/mn10300/Makefile                          |  135 ++
 arch/mn10300/boot/.gitignore                   |    1 +
 arch/mn10300/boot/Makefile                     |   28 +
 arch/mn10300/boot/compressed/Makefile          |   22 +
 arch/mn10300/boot/compressed/head.S            |   86 ++
 arch/mn10300/boot/compressed/misc.c            |  429 ++++++
 arch/mn10300/boot/compressed/misc.h            |   18 +
 arch/mn10300/boot/compressed/vmlinux.lds       |    9 +
 arch/mn10300/boot/install.sh                   |   67 +
 arch/mn10300/boot/tools/build.c                |  190 +++
 arch/mn10300/configs/asb2303_defconfig         |  555 +++++++
 arch/mn10300/kernel/Makefile                   |   27 +
 arch/mn10300/kernel/asm-offsets.c              |  108 ++
 arch/mn10300/kernel/entry.S                    |  721 +++++++++
 arch/mn10300/kernel/fpu-low.S                  |  197 +++
 arch/mn10300/kernel/fpu.c                      |  223 +++
 arch/mn10300/kernel/gdb-cache.S                |  105 ++
 arch/mn10300/kernel/gdb-io-serial-low.S        |   90 ++
 arch/mn10300/kernel/gdb-io-serial.c            |  155 ++
 arch/mn10300/kernel/gdb-io-ttysm-low.S         |   93 ++
 arch/mn10300/kernel/gdb-io-ttysm.c             |  299 ++++
 arch/mn10300/kernel/gdb-low.S                  |  115 ++
 arch/mn10300/kernel/gdb-stub.c                 | 1947 ++++++++++++++++++++++++
 arch/mn10300/kernel/head.S                     |  255 ++++
 arch/mn10300/kernel/init_task.c                |   45 +
 arch/mn10300/kernel/internal.h                 |   20 +
 arch/mn10300/kernel/io.c                       |   30 +
 arch/mn10300/kernel/irq.c                      |  235 +++
 arch/mn10300/kernel/kernel_execve.S            |   37 +
 arch/mn10300/kernel/kprobes.c                  |  653 ++++++++
 arch/mn10300/kernel/kthread.S                  |   31 +
 arch/mn10300/kernel/mn10300-debug.c            |   58 +
 arch/mn10300/kernel/mn10300-serial-low.S       |  191 +++
 arch/mn10300/kernel/mn10300-serial.c           | 1480 ++++++++++++++++++
 arch/mn10300/kernel/mn10300-serial.h           |  126 ++
 arch/mn10300/kernel/mn10300-watchdog-low.S     |   59 +
 arch/mn10300/kernel/mn10300-watchdog.c         |  183 +++
 arch/mn10300/kernel/mn10300_ksyms.c            |   37 +
 arch/mn10300/kernel/module.c                   |  206 +++
 arch/mn10300/kernel/process.c                  |  297 ++++
 arch/mn10300/kernel/profile-low.S              |   72 +
 arch/mn10300/kernel/profile.c                  |   51 +
 arch/mn10300/kernel/ptrace.c                   |  379 +++++
 arch/mn10300/kernel/rtc.c                      |  173 +++
 arch/mn10300/kernel/semaphore.c                |  149 ++
 arch/mn10300/kernel/setup.c                    |  298 ++++
 arch/mn10300/kernel/sigframe.h                 |   33 +
 arch/mn10300/kernel/signal.c                   |  564 +++++++
 arch/mn10300/kernel/switch_to.S                |   71 +
 arch/mn10300/kernel/sys_mn10300.c              |  193 +++
 arch/mn10300/kernel/time.c                     |  129 ++
 arch/mn10300/kernel/traps.c                    |  619 ++++++++
 arch/mn10300/kernel/vmlinux.lds.S              |  159 ++
 arch/mn10300/lib/Makefile                      |    7 +
 arch/mn10300/lib/__ashldi3.S                   |   51 +
 arch/mn10300/lib/__ashrdi3.S                   |   52 +
 arch/mn10300/lib/__lshrdi3.S                   |   52 +
 arch/mn10300/lib/ashrdi3.c                     |   61 +
 arch/mn10300/lib/bitops.c                      |   51 +
 arch/mn10300/lib/checksum.c                    |   99 ++
 arch/mn10300/lib/delay.c                       |   50 +
 arch/mn10300/lib/do_csum.S                     |  162 ++
 arch/mn10300/lib/internal.h                    |   15 +
 arch/mn10300/lib/lshrdi3.c                     |   60 +
 arch/mn10300/lib/memcpy.S                      |  135 ++
 arch/mn10300/lib/memmove.S                     |  160 ++
 arch/mn10300/lib/memset.S                      |  121 ++
 arch/mn10300/lib/negdi2.c                      |   57 +
 arch/mn10300/lib/usercopy.c                    |  166 ++
 arch/mn10300/mm/Makefile                       |   14 +
 arch/mn10300/mm/cache-flush-mn10300.S          |  192 +++
 arch/mn10300/mm/cache-mn10300.S                |  289 ++++
 arch/mn10300/mm/cache.c                        |  121 ++
 arch/mn10300/mm/dma-alloc.c                    |   56 +
 arch/mn10300/mm/extable.c                      |   26 +
 arch/mn10300/mm/fault.c                        |  405 +++++
 arch/mn10300/mm/init.c                         |  160 ++
 arch/mn10300/mm/misalignment.c                 |  661 ++++++++
 arch/mn10300/mm/mmu-context.c                  |   80 +
 arch/mn10300/mm/pgtable.c                      |  197 +++
 arch/mn10300/mm/tlb-mn10300.S                  |  207 +++
 arch/mn10300/oprofile/Kconfig                  |   23 +
 arch/mn10300/oprofile/Makefile                 |   13 +
 arch/mn10300/oprofile/op_model_null.c          |   22 +
 arch/mn10300/proc-mn103e010/Makefile           |    5 +
 arch/mn10300/proc-mn103e010/proc-init.c        |   75 +
 arch/mn10300/unit-asb2303/Makefile             |    6 +
 arch/mn10300/unit-asb2303/leds.c               |   52 +
 arch/mn10300/unit-asb2303/smc91111.c           |   52 +
 arch/mn10300/unit-asb2303/unit-init.c          |   60 +
 arch/mn10300/unit-asb2305/Makefile             |    8 +
 arch/mn10300/unit-asb2305/leds.c               |  124 ++
 arch/mn10300/unit-asb2305/pci-asb2305.c        |  303 ++++
 arch/mn10300/unit-asb2305/pci-asb2305.h        |   82 +
 arch/mn10300/unit-asb2305/pci-iomap.c          |   31 +
 arch/mn10300/unit-asb2305/pci-irq.c            |   51 +
 arch/mn10300/unit-asb2305/pci.c                |  545 +++++++
 arch/mn10300/unit-asb2305/unit-init.c          |   61 +
 drivers/input/joystick/analog.c                |    6 +-
 drivers/net/Kconfig                            |    3 +-
 drivers/net/smc91x.h                           |   12 +
 drivers/parport/Kconfig                        |    3 +-
 drivers/pci/Makefile                           |    1 +
 drivers/video/console/Kconfig                  |    2 +-
 include/asm-mn10300/.gitignore                 |    2 +
 include/asm-mn10300/Kbuild                     |    5 +
 include/asm-mn10300/atomic.h                   |  166 ++
 include/asm-mn10300/auxvec.h                   |    4 +
 include/asm-mn10300/bitops.h                   |  229 +++
 include/asm-mn10300/bug.h                      |   35 +
 include/asm-mn10300/bugs.h                     |   20 +
 include/asm-mn10300/busctl-regs.h              |  151 ++
 include/asm-mn10300/byteorder.h                |   46 +
 include/asm-mn10300/cache.h                    |   54 +
 include/asm-mn10300/cacheflush.h               |  116 ++
 include/asm-mn10300/checksum.h                 |   86 ++
 include/asm-mn10300/cpu-regs.h                 |  290 ++++
 include/asm-mn10300/cputime.h                  |    1 +
 include/asm-mn10300/current.h                  |   37 +
 include/asm-mn10300/delay.h                    |   19 +
 include/asm-mn10300/device.h                   |    1 +
 include/asm-mn10300/div64.h                    |  103 ++
 include/asm-mn10300/dma-mapping.h              |  234 +++
 include/asm-mn10300/dma.h                      |  118 ++
 include/asm-mn10300/dmactl-regs.h              |  101 ++
 include/asm-mn10300/elf.h                      |  147 ++
 include/asm-mn10300/emergency-restart.h        |    1 +
 include/asm-mn10300/errno.h                    |    1 +
 include/asm-mn10300/exceptions.h               |  121 ++
 include/asm-mn10300/fb.h                       |   23 +
 include/asm-mn10300/fcntl.h                    |    1 +
 include/asm-mn10300/fpu.h                      |   85 ++
 include/asm-mn10300/frame.inc                  |   91 ++
 include/asm-mn10300/futex.h                    |    1 +
 include/asm-mn10300/gdb-stub.h                 |  183 +++
 include/asm-mn10300/hardirq.h                  |   48 +
 include/asm-mn10300/highmem.h                  |  114 ++
 include/asm-mn10300/hw_irq.h                   |   14 +
 include/asm-mn10300/ide.h                      |   43 +
 include/asm-mn10300/intctl-regs.h              |   73 +
 include/asm-mn10300/io.h                       |  299 ++++
 include/asm-mn10300/ioctl.h                    |    1 +
 include/asm-mn10300/ioctls.h                   |   88 ++
 include/asm-mn10300/ipc.h                      |    1 +
 include/asm-mn10300/ipcbuf.h                   |   29 +
 include/asm-mn10300/irq.h                      |   32 +
 include/asm-mn10300/irq_regs.h                 |   24 +
 include/asm-mn10300/kdebug.h                   |   22 +
 include/asm-mn10300/kmap_types.h               |   31 +
 include/asm-mn10300/kprobes.h                  |   50 +
 include/asm-mn10300/linkage.h                  |   22 +
 include/asm-mn10300/local.h                    |    1 +
 include/asm-mn10300/mc146818rtc.h              |    1 +
 include/asm-mn10300/mman.h                     |   28 +
 include/asm-mn10300/mmu.h                      |   19 +
 include/asm-mn10300/mmu_context.h              |  138 ++
 include/asm-mn10300/module.h                   |   27 +
 include/asm-mn10300/msgbuf.h                   |   31 +
 include/asm-mn10300/mutex.h                    |   16 +
 include/asm-mn10300/namei.h                    |   22 +
 include/asm-mn10300/nmi.h                      |   14 +
 include/asm-mn10300/page.h                     |  131 ++
 include/asm-mn10300/page_offset.h              |   11 +
 include/asm-mn10300/param.h                    |   34 +
 include/asm-mn10300/pci.h                      |  133 ++
 include/asm-mn10300/percpu.h                   |    1 +
 include/asm-mn10300/pgalloc.h                  |   56 +
 include/asm-mn10300/pgtable.h                  |  489 ++++++
 include/asm-mn10300/pio-regs.h                 |  233 +++
 include/asm-mn10300/poll.h                     |    1 +
 include/asm-mn10300/posix_types.h              |  132 ++
 include/asm-mn10300/proc-mn103e010/cache.h     |   33 +
 include/asm-mn10300/proc-mn103e010/clock.h     |   18 +
 include/asm-mn10300/proc-mn103e010/irq.h       |   34 +
 include/asm-mn10300/proc-mn103e010/proc.h      |   18 +
 include/asm-mn10300/processor.h                |  186 +++
 include/asm-mn10300/ptrace.h                   |   99 ++
 include/asm-mn10300/reset-regs.h               |   64 +
 include/asm-mn10300/resource.h                 |    1 +
 include/asm-mn10300/rtc-regs.h                 |   86 ++
 include/asm-mn10300/rtc.h                      |   41 +
 include/asm-mn10300/scatterlist.h              |   46 +
 include/asm-mn10300/sections.h                 |    1 +
 include/asm-mn10300/semaphore.h                |  169 ++
 include/asm-mn10300/sembuf.h                   |   25 +
 include/asm-mn10300/serial-regs.h              |  160 ++
 include/asm-mn10300/serial.h                   |   36 +
 include/asm-mn10300/setup.h                    |   17 +
 include/asm-mn10300/shmbuf.h                   |   42 +
 include/asm-mn10300/shmparam.h                 |    6 +
 include/asm-mn10300/sigcontext.h               |   52 +
 include/asm-mn10300/siginfo.h                  |    1 +
 include/asm-mn10300/signal.h                   |  171 +++
 include/asm-mn10300/smp.h                      |   18 +
 include/asm-mn10300/socket.h                   |   55 +
 include/asm-mn10300/sockios.h                  |   13 +
 include/asm-mn10300/spinlock.h                 |   16 +
 include/asm-mn10300/stat.h                     |   78 +
 include/asm-mn10300/statfs.h                   |    1 +
 include/asm-mn10300/string.h                   |   32 +
 include/asm-mn10300/system.h                   |  237 +++
 include/asm-mn10300/termbits.h                 |  200 +++
 include/asm-mn10300/termios.h                  |   92 ++
 include/asm-mn10300/thread_info.h              |  168 ++
 include/asm-mn10300/timer-regs.h               |  293 ++++
 include/asm-mn10300/timex.h                    |   33 +
 include/asm-mn10300/tlb.h                      |   34 +
 include/asm-mn10300/tlbflush.h                 |   80 +
 include/asm-mn10300/topology.h                 |    1 +
 include/asm-mn10300/types.h                    |   67 +
 include/asm-mn10300/uaccess.h                  |  490 ++++++
 include/asm-mn10300/ucontext.h                 |   22 +
 include/asm-mn10300/unaligned.h                |  136 ++
 include/asm-mn10300/unistd.h                   |  384 +++++
 include/asm-mn10300/unit-asb2303/clock.h       |   45 +
 include/asm-mn10300/unit-asb2303/leds.h        |   43 +
 include/asm-mn10300/unit-asb2303/serial.h      |  136 ++
 include/asm-mn10300/unit-asb2303/smc91111.h    |   50 +
 include/asm-mn10300/unit-asb2303/timex.h       |  135 ++
 include/asm-mn10300/unit-asb2305/clock.h       |   45 +
 include/asm-mn10300/unit-asb2305/leds.h        |   51 +
 include/asm-mn10300/unit-asb2305/serial.h      |  120 ++
 include/asm-mn10300/unit-asb2305/timex.h       |  135 ++
 include/asm-mn10300/user.h                     |   53 +
 include/asm-mn10300/vga.h                      |   17 +
 include/asm-mn10300/xor.h                      |    1 +
 include/linux/elf-em.h                         |    3 +
 include/linux/kernel.h                         |    1 +
 lib/Kconfig.debug                              |    7 +-
 234 files changed, 27906 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/mn10300/ABI.txt
 create mode 100644 Documentation/mn10300/compartmentalisation.txt
 create mode 100644 arch/mn10300/Kconfig
 create mode 100644 arch/mn10300/Kconfig.debug
 create mode 100644 arch/mn10300/Makefile
 create mode 100644 arch/mn10300/boot/.gitignore
 create mode 100644 arch/mn10300/boot/Makefile
 create mode 100644 arch/mn10300/boot/compressed/Makefile
 create mode 100644 arch/mn10300/boot/compressed/head.S
 create mode 100644 arch/mn10300/boot/compressed/misc.c
 create mode 100644 arch/mn10300/boot/compressed/misc.h
 create mode 100644 arch/mn10300/boot/compressed/vmlinux.lds
 create mode 100644 arch/mn10300/boot/install.sh
 create mode 100644 arch/mn10300/boot/tools/build.c
 create mode 100644 arch/mn10300/configs/asb2303_defconfig
 create mode 100644 arch/mn10300/kernel/Makefile
 create mode 100644 arch/mn10300/kernel/asm-offsets.c
 create mode 100644 arch/mn10300/kernel/entry.S
 create mode 100644 arch/mn10300/kernel/fpu-low.S
 create mode 100644 arch/mn10300/kernel/fpu.c
 create mode 100644 arch/mn10300/kernel/gdb-cache.S
 create mode 100644 arch/mn10300/kernel/gdb-io-serial-low.S
 create mode 100644 arch/mn10300/kernel/gdb-io-serial.c
 create mode 100644 arch/mn10300/kernel/gdb-io-ttysm-low.S
 create mode 100644 arch/mn10300/kernel/gdb-io-ttysm.c
 create mode 100644 arch/mn10300/kernel/gdb-low.S
 create mode 100644 arch/mn10300/kernel/gdb-stub.c
 create mode 100644 arch/mn10300/kernel/head.S
 create mode 100644 arch/mn10300/kernel/init_task.c
 create mode 100644 arch/mn10300/kernel/internal.h
 create mode 100644 arch/mn10300/kernel/io.c
 create mode 100644 arch/mn10300/kernel/irq.c
 create mode 100644 arch/mn10300/kernel/kernel_execve.S
 create mode 100644 arch/mn10300/kernel/kprobes.c
 create mode 100644 arch/mn10300/kernel/kthread.S
 create mode 100644 arch/mn10300/kernel/mn10300-debug.c
 create mode 100644 arch/mn10300/kernel/mn10300-serial-low.S
 create mode 100644 arch/mn10300/kernel/mn10300-serial.c
 create mode 100644 arch/mn10300/kernel/mn10300-serial.h
 create mode 100644 arch/mn10300/kernel/mn10300-watchdog-low.S
 create mode 100644 arch/mn10300/kernel/mn10300-watchdog.c
 create mode 100644 arch/mn10300/kernel/mn10300_ksyms.c
 create mode 100644 arch/mn10300/kernel/module.c
 create mode 100644 arch/mn10300/kernel/process.c
 create mode 100644 arch/mn10300/kernel/profile-low.S
 create mode 100644 arch/mn10300/kernel/profile.c
 create mode 100644 arch/mn10300/kernel/ptrace.c
 create mode 100644 arch/mn10300/kernel/rtc.c
 create mode 100644 arch/mn10300/kernel/semaphore.c
 create mode 100644 arch/mn10300/kernel/setup.c
 create mode 100644 arch/mn10300/kernel/sigframe.h
 create mode 100644 arch/mn10300/kernel/signal.c
 create mode 100644 arch/mn10300/kernel/switch_to.S
 create mode 100644 arch/mn10300/kernel/sys_mn10300.c
 create mode 100644 arch/mn10300/kernel/time.c
 create mode 100644 arch/mn10300/kernel/traps.c
 create mode 100644 arch/mn10300/kernel/vmlinux.lds.S
 create mode 100644 arch/mn10300/lib/Makefile
 create mode 100644 arch/mn10300/lib/__ashldi3.S
 create mode 100644 arch/mn10300/lib/__ashrdi3.S
 create mode 100644 arch/mn10300/lib/__lshrdi3.S
 create mode 100644 arch/mn10300/lib/ashrdi3.c
 create mode 100644 arch/mn10300/lib/bitops.c
 create mode 100644 arch/mn10300/lib/checksum.c
 create mode 100644 arch/mn10300/lib/delay.c
 create mode 100644 arch/mn10300/lib/do_csum.S
 create mode 100644 arch/mn10300/lib/internal.h
 create mode 100644 arch/mn10300/lib/lshrdi3.c
 create mode 100644 arch/mn10300/lib/memcpy.S
 create mode 100644 arch/mn10300/lib/memmove.S
 create mode 100644 arch/mn10300/lib/memset.S
 create mode 100644 arch/mn10300/lib/negdi2.c
 create mode 100644 arch/mn10300/lib/usercopy.c
 create mode 100644 arch/mn10300/mm/Makefile
 create mode 100644 arch/mn10300/mm/cache-flush-mn10300.S
 create mode 100644 arch/mn10300/mm/cache-mn10300.S
 create mode 100644 arch/mn10300/mm/cache.c
 create mode 100644 arch/mn10300/mm/dma-alloc.c
 create mode 100644 arch/mn10300/mm/extable.c
 create mode 100644 arch/mn10300/mm/fault.c
 create mode 100644 arch/mn10300/mm/init.c
 create mode 100644 arch/mn10300/mm/misalignment.c
 create mode 100644 arch/mn10300/mm/mmu-context.c
 create mode 100644 arch/mn10300/mm/pgtable.c
 create mode 100644 arch/mn10300/mm/tlb-mn10300.S
 create mode 100644 arch/mn10300/oprofile/Kconfig
 create mode 100644 arch/mn10300/oprofile/Makefile
 create mode 100644 arch/mn10300/oprofile/op_model_null.c
 create mode 100644 arch/mn10300/proc-mn103e010/Makefile
 create mode 100644 arch/mn10300/proc-mn103e010/proc-init.c
 create mode 100644 arch/mn10300/unit-asb2303/Makefile
 create mode 100644 arch/mn10300/unit-asb2303/leds.c
 create mode 100644 arch/mn10300/unit-asb2303/smc91111.c
 create mode 100644 arch/mn10300/unit-asb2303/unit-init.c
 create mode 100644 arch/mn10300/unit-asb2305/Makefile
 create mode 100644 arch/mn10300/unit-asb2305/leds.c
 create mode 100644 arch/mn10300/unit-asb2305/pci-asb2305.c
 create mode 100644 arch/mn10300/unit-asb2305/pci-asb2305.h
 create mode 100644 arch/mn10300/unit-asb2305/pci-iomap.c
 create mode 100644 arch/mn10300/unit-asb2305/pci-irq.c
 create mode 100644 arch/mn10300/unit-asb2305/pci.c
 create mode 100644 arch/mn10300/unit-asb2305/unit-init.c
 create mode 100644 include/asm-mn10300/.gitignore
 create mode 100644 include/asm-mn10300/Kbuild
 create mode 100644 include/asm-mn10300/atomic.h
 create mode 100644 include/asm-mn10300/auxvec.h
 create mode 100644 include/asm-mn10300/bitops.h
 create mode 100644 include/asm-mn10300/bug.h
 create mode 100644 include/asm-mn10300/bugs.h
 create mode 100644 include/asm-mn10300/busctl-regs.h
 create mode 100644 include/asm-mn10300/byteorder.h
 create mode 100644 include/asm-mn10300/cache.h
 create mode 100644 include/asm-mn10300/cacheflush.h
 create mode 100644 include/asm-mn10300/checksum.h
 create mode 100644 include/asm-mn10300/cpu-regs.h
 create mode 100644 include/asm-mn10300/cputime.h
 create mode 100644 include/asm-mn10300/current.h
 create mode 100644 include/asm-mn10300/delay.h
 create mode 100644 include/asm-mn10300/device.h
 create mode 100644 include/asm-mn10300/div64.h
 create mode 100644 include/asm-mn10300/dma-mapping.h
 create mode 100644 include/asm-mn10300/dma.h
 create mode 100644 include/asm-mn10300/dmactl-regs.h
 create mode 100644 include/asm-mn10300/elf.h
 create mode 100644 include/asm-mn10300/emergency-restart.h
 create mode 100644 include/asm-mn10300/errno.h
 create mode 100644 include/asm-mn10300/exceptions.h
 create mode 100644 include/asm-mn10300/fb.h
 create mode 100644 include/asm-mn10300/fcntl.h
 create mode 100644 include/asm-mn10300/fpu.h
 create mode 100644 include/asm-mn10300/frame.inc
 create mode 100644 include/asm-mn10300/futex.h
 create mode 100644 include/asm-mn10300/gdb-stub.h
 create mode 100644 include/asm-mn10300/hardirq.h
 create mode 100644 include/asm-mn10300/highmem.h
 create mode 100644 include/asm-mn10300/hw_irq.h
 create mode 100644 include/asm-mn10300/ide.h
 create mode 100644 include/asm-mn10300/intctl-regs.h
 create mode 100644 include/asm-mn10300/io.h
 create mode 100644 include/asm-mn10300/ioctl.h
 create mode 100644 include/asm-mn10300/ioctls.h
 create mode 100644 include/asm-mn10300/ipc.h
 create mode 100644 include/asm-mn10300/ipcbuf.h
 create mode 100644 include/asm-mn10300/irq.h
 create mode 100644 include/asm-mn10300/irq_regs.h
 create mode 100644 include/asm-mn10300/kdebug.h
 create mode 100644 include/asm-mn10300/kmap_types.h
 create mode 100644 include/asm-mn10300/kprobes.h
 create mode 100644 include/asm-mn10300/linkage.h
 create mode 100644 include/asm-mn10300/local.h
 create mode 100644 include/asm-mn10300/mc146818rtc.h
 create mode 100644 include/asm-mn10300/mman.h
 create mode 100644 include/asm-mn10300/mmu.h
 create mode 100644 include/asm-mn10300/mmu_context.h
 create mode 100644 include/asm-mn10300/module.h
 create mode 100644 include/asm-mn10300/msgbuf.h
 create mode 100644 include/asm-mn10300/mutex.h
 create mode 100644 include/asm-mn10300/namei.h
 create mode 100644 include/asm-mn10300/nmi.h
 create mode 100644 include/asm-mn10300/page.h
 create mode 100644 include/asm-mn10300/page_offset.h
 create mode 100644 include/asm-mn10300/param.h
 create mode 100644 include/asm-mn10300/pci.h
 create mode 100644 include/asm-mn10300/percpu.h
 create mode 100644 include/asm-mn10300/pgalloc.h
 create mode 100644 include/asm-mn10300/pgtable.h
 create mode 100644 include/asm-mn10300/pio-regs.h
 create mode 100644 include/asm-mn10300/poll.h
 create mode 100644 include/asm-mn10300/posix_types.h
 create mode 100644 include/asm-mn10300/proc-mn103e010/cache.h
 create mode 100644 include/asm-mn10300/proc-mn103e010/clock.h
 create mode 100644 include/asm-mn10300/proc-mn103e010/irq.h
 create mode 100644 include/asm-mn10300/proc-mn103e010/proc.h
 create mode 100644 include/asm-mn10300/processor.h
 create mode 100644 include/asm-mn10300/ptrace.h
 create mode 100644 include/asm-mn10300/reset-regs.h
 create mode 100644 include/asm-mn10300/resource.h
 create mode 100644 include/asm-mn10300/rtc-regs.h
 create mode 100644 include/asm-mn10300/rtc.h
 create mode 100644 include/asm-mn10300/scatterlist.h
 create mode 100644 include/asm-mn10300/sections.h
 create mode 100644 include/asm-mn10300/semaphore.h
 create mode 100644 include/asm-mn10300/sembuf.h
 create mode 100644 include/asm-mn10300/serial-regs.h
 create mode 100644 include/asm-mn10300/serial.h
 create mode 100644 include/asm-mn10300/setup.h
 create mode 100644 include/asm-mn10300/shmbuf.h
 create mode 100644 include/asm-mn10300/shmparam.h
 create mode 100644 include/asm-mn10300/sigcontext.h
 create mode 100644 include/asm-mn10300/siginfo.h
 create mode 100644 include/asm-mn10300/signal.h
 create mode 100644 include/asm-mn10300/smp.h
 create mode 100644 include/asm-mn10300/socket.h
 create mode 100644 include/asm-mn10300/sockios.h
 create mode 100644 include/asm-mn10300/spinlock.h
 create mode 100644 include/asm-mn10300/stat.h
 create mode 100644 include/asm-mn10300/statfs.h
 create mode 100644 include/asm-mn10300/string.h
 create mode 100644 include/asm-mn10300/system.h
 create mode 100644 include/asm-mn10300/termbits.h
 create mode 100644 include/asm-mn10300/termios.h
 create mode 100644 include/asm-mn10300/thread_info.h
 create mode 100644 include/asm-mn10300/timer-regs.h
 create mode 100644 include/asm-mn10300/timex.h
 create mode 100644 include/asm-mn10300/tlb.h
 create mode 100644 include/asm-mn10300/tlbflush.h
 create mode 100644 include/asm-mn10300/topology.h
 create mode 100644 include/asm-mn10300/types.h
 create mode 100644 include/asm-mn10300/uaccess.h
 create mode 100644 include/asm-mn10300/ucontext.h
 create mode 100644 include/asm-mn10300/unaligned.h
 create mode 100644 include/asm-mn10300/unistd.h
 create mode 100644 include/asm-mn10300/unit-asb2303/clock.h
 create mode 100644 include/asm-mn10300/unit-asb2303/leds.h
 create mode 100644 include/asm-mn10300/unit-asb2303/serial.h
 create mode 100644 include/asm-mn10300/unit-asb2303/smc91111.h
 create mode 100644 include/asm-mn10300/unit-asb2303/timex.h
 create mode 100644 include/asm-mn10300/unit-asb2305/clock.h
 create mode 100644 include/asm-mn10300/unit-asb2305/leds.h
 create mode 100644 include/asm-mn10300/unit-asb2305/serial.h
 create mode 100644 include/asm-mn10300/unit-asb2305/timex.h
 create mode 100644 include/asm-mn10300/user.h
 create mode 100644 include/asm-mn10300/vga.h
 create mode 100644 include/asm-mn10300/xor.h

(limited to 'include/linux')

diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt
new file mode 100644
index 000000000000..1fef1f06dfd2
--- /dev/null
+++ b/Documentation/mn10300/ABI.txt
@@ -0,0 +1,149 @@
+			   =========================
+			   MN10300 FUNCTION CALL ABI
+			   =========================
+
+=======
+GENERAL
+=======
+
+The MN10300/AM33 kernel runs in little-endian mode; big-endian mode is not
+supported.
+
+The stack grows downwards, and should always be 32-bit aligned. There are
+separate stack pointer registers for userspace and the kernel.
+
+
+================
+ARGUMENT PASSING
+================
+
+The first two arguments (assuming up to 32-bits per argument) to a function are
+passed in the D0 and D1 registers respectively; all other arguments are passed
+on the stack.
+
+If 64-bit arguments are being passed, then they are never split between
+registers and the stack. If the first argument is a 64-bit value, it will be
+passed in D0:D1. If the first argument is not a 64-bit value, but the second
+is, the second will be passed entirely on the stack and D1 will be unused.
+
+Arguments smaller than 32-bits are not coelesced within a register or a stack
+word. For example, two byte-sized arguments will always be passed in separate
+registers or word-sized stack slots.
+
+
+=================
+CALLING FUNCTIONS
+=================
+
+The caller must allocate twelve bytes on the stack for the callee's use before
+it inserts a CALL instruction. The CALL instruction will write into the TOS
+word, but won't actually modify the stack pointer; similarly, the RET
+instruction reads from the TOS word of the stack, but doesn't move the stack
+pointer beyond it.
+
+
+	Stack:
+	|		|
+	|		|
+	|---------------| SP+20
+	| 4th Arg	|
+	|---------------| SP+16
+	| 3rd Arg	|
+	|---------------| SP+12
+	| D1 Save Slot	|
+	|---------------| SP+8
+	| D0 Save Slot	|
+	|---------------| SP+4
+	| Return Addr	|
+	|---------------| SP
+	|		|
+	|		|
+
+
+The caller must leave space on the stack (hence an allocation of twelve bytes)
+in which the callee may store the first two arguments.
+
+
+============
+RETURN VALUE
+============
+
+The return value is passed in D0 for an integer (or D0:D1 for a 64-bit value),
+or A0 for a pointer.
+
+If the return value is a value larger than 64-bits, or is a structure or an
+array, then a hidden first argument will be passed to the callee by the caller:
+this will point to a piece of memory large enough to hold the result of the
+function. In this case, the callee will return the value in that piece of
+memory, and no value will be returned in D0 or A0.
+
+
+===================
+REGISTER CLOBBERING
+===================
+
+The values in certain registers may be clobbered by the callee, and other
+values must be saved:
+
+	Clobber:	D0-D1, A0-A1, E0-E3
+	Save:		D2-D3, A2-A3, E4-E7, SP
+
+All other non-supervisor-only registers are clobberable (such as MDR, MCRL,
+MCRH).
+
+
+=================
+SPECIAL REGISTERS
+=================
+
+Certain ordinary registers may carry special usage for the compiler:
+
+	A3:	Frame pointer
+	E2:	TLS pointer
+
+
+==========
+KERNEL ABI
+==========
+
+The kernel may use a slightly different ABI internally.
+
+ (*) E2
+
+     If CONFIG_MN10300_CURRENT_IN_E2 is defined, then the current task pointer
+     will be kept in the E2 register, and that register will be marked
+     unavailable for the compiler to use as a scratch register.
+
+     Normally the kernel uses something like:
+
+	MOV	SP,An
+	AND	0xFFFFE000,An
+	MOV	(An),Rm		// Rm holds current
+	MOV	(yyy,Rm)	// Access current->yyy
+
+     To find the address of current; but since this option permits current to
+     be carried globally in an register, it can use:
+
+	MOV	(yyy,E2)	// Access current->yyy
+
+     instead.
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+System calls are called with the following convention:
+
+	REGISTER	ENTRY			EXIT
+	===============	=======================	=======================
+	D0		Syscall number		Return value
+	A0		1st syscall argument	Saved
+	D1		2nd syscall argument	Saved
+	A3		3rd syscall argument	Saved
+	A2		4th syscall argument	Saved
+	D3		5th syscall argument	Saved
+	D2		6th syscall argument	Saved
+
+All other registers are saved.  The layout is a consequence of the way the MOVM
+instruction stores registers onto the stack.
diff --git a/Documentation/mn10300/compartmentalisation.txt b/Documentation/mn10300/compartmentalisation.txt
new file mode 100644
index 000000000000..8958b51dac4b
--- /dev/null
+++ b/Documentation/mn10300/compartmentalisation.txt
@@ -0,0 +1,60 @@
+		   =========================================
+		   PART-SPECIFIC SOURCE COMPARTMENTALISATION
+		   =========================================
+
+The sources for various parts are compartmentalised at two different levels:
+
+ (1) Processor level
+
+     The "processor level" is a CPU core plus the other on-silicon
+     peripherals.
+
+     Processor-specific header files are divided among directories in a similar
+     way to the CPU level:
+
+	(*) include/asm-mn10300/proc-mn103e010/
+
+	    Support for the AM33v2 CPU core.
+
+     The appropriate processor is selected by a CONFIG_MN10300_PROC_YYYY option
+     from the "Processor support" choice menu in the arch/mn10300/Kconfig file.
+
+
+ (2) Unit level
+
+     The "unit level" is a processor plus all the external peripherals
+     controlled by that processor.
+
+     Unit-specific header files are divided among directories in a similar way
+     to the CPU level; not only that, but specific sources may also be
+     segregated into separate directories under the arch directory:
+
+	(*) include/asm-mn10300/unit-asb2303/
+	(*) arch/mn10300/unit-asb2303/
+
+	    Support for the ASB2303 board with an ASB2308 daughter board.
+
+	(*) include/asm-mn10300/unit-asb2305/
+	(*) arch/mn10300/unit-asb2305/
+
+	    Support for the ASB2305 board.
+
+     The appropriate processor is selected by a CONFIG_MN10300_UNIT_ZZZZ option
+     from the "Unit type" choice menu in the arch/mn10300/Kconfig file.
+
+
+============
+COMPILE TIME
+============
+
+When the kernel is compiled, symbolic links will be made in the asm header file
+directory for this arch:
+
+	include/asm-mn10300/proc => include/asm-mn10300/proc-YYYY/
+	include/asm-mn10300/unit => include/asm-mn10300/unit-ZZZZ/
+
+So that the header files contained in those directories can be accessed without
+lots of #ifdef-age.
+
+The appropriate arch/mn10300/unit-ZZZZ directory will also be entered by the
+compilation process; all other unit-specific directories will be ignored.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2cdb591ac080..a7e6ef3dae16 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2614,6 +2614,15 @@ L:	linux-kernel@vger.kernel.org
 W:	http://www.linux-mm.org
 S:	Maintained
 
+MEI MN10300/AM33 PORT
+P:	David Howells
+M:	dhowells@redhat.com
+P:	Koichi Yasutake
+M:	yasutake.koichi@jp.panasonic.com
+L:	linux-am33-list@redhat.com
+W:	ftp://ftp.redhat.com/pub/redhat/gnupro/AM33/
+S:	Maintained
+
 MEMORY TECHNOLOGY DEVICES (MTD)
 P:	David Woodhouse
 M:	dwmw2@infradead.org
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
new file mode 100644
index 000000000000..eedc3a5e0d9b
--- /dev/null
+++ b/arch/mn10300/Kconfig
@@ -0,0 +1,381 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config MN10300
+	def_bool y
+
+config AM33
+	def_bool y
+
+config MMU
+	def_bool y
+
+config HIGHMEM
+	def_bool n
+
+config NUMA
+	def_bool n
+
+config UID16
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_HARDIRQS_NO__DO_IRQ
+	def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config GENERIC_FIND_NEXT_BIT
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_TIME
+	def_bool y
+
+config GENERIC_BUG
+	def_bool y
+
+config QUICKLIST
+	def_bool y
+
+config ARCH_HAS_ILOG2_U32
+	def_bool y
+
+config ARCH_SUPPORTS_AOUT
+	def_bool n
+
+# Use the generic interrupt handling code in kernel/irq/
+config GENERIC_HARDIRQS
+	def_bool y
+
+config HOTPLUG_CPU
+	def_bool n
+
+mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
+
+source "init/Kconfig"
+
+
+menu "Matsushita MN10300 system setup"
+
+choice
+	prompt "Unit type"
+	default MN10300_UNIT_ASB2303
+	help
+	  This option specifies board for which the kernel will be
+	  compiled. It affects the external peripherals catered for.
+
+config MN10300_UNIT_ASB2303
+	bool "ASB2303"
+
+config MN10300_UNIT_ASB2305
+	bool "ASB2305"
+
+endchoice
+
+choice
+	prompt "Processor support"
+	default MN10300_PROC_MN103E010
+	help
+	  This option specifies the processor for which the kernel will be
+	  compiled. It affects the on-chip peripherals catered for.
+
+config MN10300_PROC_MN103E010
+	bool "MN103E010"
+	depends on MN10300_UNIT_ASB2303 || MN10300_UNIT_ASB2305
+	select MN10300_PROC_HAS_TTYSM0
+	select MN10300_PROC_HAS_TTYSM1
+	select MN10300_PROC_HAS_TTYSM2
+
+endchoice
+
+choice
+	prompt "Processor core support"
+	default MN10300_CPU_AM33V2
+	help
+	  This option specifies the processor core for which the kernel will be
+	  compiled. It affects the instruction set used.
+
+config MN10300_CPU_AM33V2
+	bool "AM33v2"
+
+endchoice
+
+config FPU
+	bool "FPU present"
+	default y
+	depends on MN10300_PROC_MN103E010
+
+choice
+	prompt "CPU Caching mode"
+	default MN10300_CACHE_WBACK
+	help
+	  This option determines the caching mode for the kernel.
+
+	  Write-Back caching mode involves the all reads and writes causing
+	  the affected cacheline to be read into the cache first before being
+	  operated upon. Memory is not then updated by a write until the cache
+	  is filled and a cacheline needs to be displaced from the cache to
+	  make room. Only at that point is it written back.
+
+	  Write-Through caching only fetches cachelines from memory on a
+	  read. Writes always get written directly to memory. If the affected
+	  cacheline is also in cache, it will be updated too.
+
+	  The final option is to turn of caching entirely.
+
+config MN10300_CACHE_WBACK
+	bool "Write-Back"
+
+config MN10300_CACHE_WTHRU
+	bool "Write-Through"
+
+config MN10300_CACHE_DISABLED
+	bool "Disabled"
+
+endchoice
+
+menu "Memory layout options"
+
+config KERNEL_RAM_BASE_ADDRESS
+	hex "Base address of kernel RAM"
+	default "0x90000000"
+
+config INTERRUPT_VECTOR_BASE
+	hex "Base address of vector table"
+	default "0x90000000"
+	help
+	  The base address of the vector table will be programmed into
+          the TBR register. It must be on 16MiB address boundary.
+
+config KERNEL_TEXT_ADDRESS
+	hex "Base address of kernel"
+	default "0x90001000"
+
+config KERNEL_ZIMAGE_BASE_ADDRESS
+	hex "Base address of compressed vmlinux image"
+	default "0x90700000"
+
+endmenu
+
+config PREEMPT
+	bool "Preemptible Kernel"
+	help
+	  This option reduces the latency of the kernel when reacting to
+	  real-time or interactive events by allowing a low priority process to
+	  be preempted even if it is in kernel mode executing a system call.
+	  This allows applications to run more reliably even when the system is
+	  under load.
+
+	  Say Y here if you are building a kernel for a desktop, embedded
+	  or real-time system.  Say N if you are unsure.
+
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
+config MN10300_CURRENT_IN_E2
+	bool "Hold current task address in E2 register"
+	default y
+	help
+	  This option removes the E2/R2 register from the set available to gcc
+	  for normal use and instead uses it to store the address of the
+	  current process's task_struct whilst in the kernel.
+
+	  This means the kernel doesn't need to calculate the address each time
+	  "current" is used (take SP, AND with mask and dereference pointer
+	  just to get the address), and instead can just use E2+offset
+	  addressing each time.
+
+	  This has no effect on userspace.
+
+config MN10300_USING_JTAG
+	bool "Using JTAG to debug kernel"
+	default y
+	help
+	  This options indicates that JTAG will be used to debug the kernel. It
+	  suppresses the use of certain hardware debugging features, such as
+	  single-stepping, which are taken over completely by the JTAG unit.
+
+config MN10300_RTC
+	bool "Using MN10300 RTC"
+	depends on MN10300_PROC_MN103E010
+	default n
+	help
+
+	  This option enables support for the RTC, thus enabling time to be
+	  tracked, even when system is powered down. This is available on-chip
+	  on the MN103E010.
+
+config MN10300_WD_TIMER
+	bool "Using MN10300 watchdog timer"
+	default y
+	help
+	  This options indicates that the watchdog timer will be used.
+
+config PCI
+	bool "Use PCI"
+	depends on MN10300_UNIT_ASB2305
+	default y
+	help
+	  Some systems (such as the ASB2305) have PCI onboard. If you have one
+	  of these boards and you wish to use the PCI facilities, say Y here.
+
+	  The PCI-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information about which PCI hardware does work under Linux and which
+	  doesn't.
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+menu "MN10300 internal serial options"
+
+config MN10300_PROC_HAS_TTYSM0
+	bool
+	default n
+
+config MN10300_PROC_HAS_TTYSM1
+	bool
+	default n
+
+config MN10300_PROC_HAS_TTYSM2
+	bool
+	default n
+
+config MN10300_TTYSM
+	bool "Support for ttySM serial ports"
+	depends on MN10300
+	default y
+	select SERIAL_CORE
+	help
+	  This option enables support for the on-chip serial ports that the
+	  MN10300 has available.
+
+config MN10300_TTYSM_CONSOLE
+	bool "Support for console on ttySM serial ports"
+	depends on MN10300_TTYSM
+	select SERIAL_CORE_CONSOLE
+	help
+	  This option enables support for a console on the on-chip serial ports
+	  that the MN10300 has available.
+
+#
+# /dev/ttySM0
+#
+config MN10300_TTYSM0
+	bool "Enable SIF0 (/dev/ttySM0)"
+	depends on MN10300_TTYSM && MN10300_PROC_HAS_TTYSM0
+	help
+	  Enable access to SIF0 through /dev/ttySM0 or gdb-stub
+
+choice
+	prompt "Select the timer to supply the clock for SIF0"
+	default MN10300_TTYSM0_TIMER8
+	depends on MN10300_TTYSM0
+
+config MN10300_TTYSM0_TIMER8
+	bool "Use timer 8 (16-bit)"
+
+config MN10300_TTYSM0_TIMER2
+	bool "Use timer 2 (8-bit)"
+
+endchoice
+
+#
+# /dev/ttySM1
+#
+config MN10300_TTYSM1
+	bool "Enable SIF1 (/dev/ttySM1)"
+	depends on MN10300_TTYSM && MN10300_PROC_HAS_TTYSM1
+	help
+	  Enable access to SIF1 through /dev/ttySM1 or gdb-stub
+
+choice
+	prompt "Select the timer to supply the clock for SIF1"
+	default MN10300_TTYSM0_TIMER9
+	depends on MN10300_TTYSM1
+
+config MN10300_TTYSM1_TIMER9
+	bool "Use timer 9 (16-bit)"
+
+config MN10300_TTYSM1_TIMER3
+	bool "Use timer 3 (8-bit)"
+
+endchoice
+
+#
+# /dev/ttySM2
+#
+config MN10300_TTYSM2
+	bool "Enable SIF2 (/dev/ttySM2)"
+	depends on MN10300_TTYSM && MN10300_PROC_HAS_TTYSM2
+	help
+	  Enable access to SIF2 through /dev/ttySM2 or gdb-stub
+
+choice
+	prompt "Select the timer to supply the clock for SIF2"
+	default MN10300_TTYSM0_TIMER10
+	depends on MN10300_TTYSM2
+
+config MN10300_TTYSM2_TIMER10
+	bool "Use timer 10 (16-bit)"
+
+endchoice
+
+config MN10300_TTYSM2_CTS
+	bool "Enable the use of the CTS line /dev/ttySM2"
+	depends on MN10300_TTYSM2
+
+endmenu
+
+source "mm/Kconfig"
+
+menu "Power management options"
+source kernel/power/Kconfig
+endmenu
+
+endmenu
+
+
+menu "Executable formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/mn10300/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+source "arch/mn10300/oprofile/Kconfig"
diff --git a/arch/mn10300/Kconfig.debug b/arch/mn10300/Kconfig.debug
new file mode 100644
index 000000000000..524e33819f32
--- /dev/null
+++ b/arch/mn10300/Kconfig.debug
@@ -0,0 +1,135 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+
+config DEBUG_DECOMPRESS_KERNEL
+	bool "Using serial port during decompressing kernel"
+	depends on DEBUG_KERNEL
+	default n
+	help
+	  If you say Y here you will confirm the start and the end of
+	  decompressing Linux seeing "Uncompressing Linux... " and
+	  "Ok, booting the kernel.\n" on console.
+
+config KPROBES
+	bool "Kprobes"
+	depends on DEBUG_KERNEL
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+config GDBSTUB
+	bool "Remote GDB kernel debugging"
+	depends on DEBUG_KERNEL
+	select DEBUG_INFO
+	select FRAME_POINTER
+	help
+	  If you say Y here, it will be possible to remotely debug the kernel
+	  using gdb. This enlarges your kernel ELF image disk size by several
+	  megabytes and requires a machine with more than 16 MB, better 32 MB
+	  RAM to avoid excessive linking time. This is only useful for kernel
+	  hackers. If unsure, say N.
+
+config GDBSTUB_IMMEDIATE
+	bool "Break into GDB stub immediately"
+	depends on GDBSTUB
+	help
+	  If you say Y here, GDB stub will break into the program as soon as
+	  possible, leaving the program counter at the beginning of
+	  start_kernel() in init/main.c.
+
+config GDB_CONSOLE
+	bool "Console output to GDB"
+	depends on GDBSTUB
+	help
+	  If you are using GDB for remote debugging over a serial port and
+	  would like kernel messages to be formatted into GDB $O packets so
+	  that GDB prints them as program output, say 'Y'.
+
+config GDBSTUB_DEBUGGING
+	bool "Debug GDB stub by messages to serial port"
+	depends on GDBSTUB
+	help
+	  This causes debugging messages to be displayed at various points
+	  during execution of the GDB stub routines. Such messages will be
+	  displayed on ttyS0 if that isn't the GDB stub's port, or ttySM0
+	  otherwise.
+
+config GDBSTUB_DEBUG_ENTRY
+	bool "Debug GDB stub entry"
+	depends on GDBSTUB_DEBUGGING
+	help
+	  This option causes information to be displayed about entry to or exit
+	  from the main GDB stub routine.
+
+config GDBSTUB_DEBUG_PROTOCOL
+	bool "Debug GDB stub protocol"
+	depends on GDBSTUB_DEBUGGING
+	help
+	  This option causes information to be displayed about the GDB remote
+	  protocol messages generated exchanged with GDB.
+
+config GDBSTUB_DEBUG_IO
+	bool "Debug GDB stub I/O"
+	depends on GDBSTUB_DEBUGGING
+	help
+	  This option causes information to be displayed about GDB stub's
+	  low-level I/O.
+
+config GDBSTUB_DEBUG_BREAKPOINT
+	bool "Debug GDB stub breakpoint management"
+	depends on GDBSTUB_DEBUGGING
+	help
+	  This option causes information to be displayed about GDB stub's
+	  breakpoint management.
+
+choice
+	prompt "GDB stub port"
+	default GDBSTUB_TTYSM0
+	depends on GDBSTUB
+	help
+	  Select the serial port used for GDB-stub.
+
+config GDBSTUB_ON_TTYSM0
+	bool "/dev/ttySM0 [SIF0]"
+	depends on MN10300_TTYSM0
+	select GDBSTUB_ON_TTYSMx
+
+config GDBSTUB_ON_TTYSM1
+	bool "/dev/ttySM1 [SIF1]"
+	depends on MN10300_TTYSM1
+	select GDBSTUB_ON_TTYSMx
+
+config GDBSTUB_ON_TTYSM2
+	bool "/dev/ttySM2 [SIF2]"
+	depends on MN10300_TTYSM2
+	select GDBSTUB_ON_TTYSMx
+
+config GDBSTUB_ON_TTYS0
+	bool "/dev/ttyS0"
+	select GDBSTUB_ON_TTYSx
+
+config GDBSTUB_ON_TTYS1
+	bool "/dev/ttyS1"
+	select GDBSTUB_ON_TTYSx
+
+endchoice
+
+config GDBSTUB_ON_TTYSMx
+	bool
+	depends on GDBSTUB_ON_TTYSM0 || GDBSTUB_ON_TTYSM1 || GDBSTUB_ON_TTYSM2
+	default y
+
+config GDBSTUB_ON_TTYSx
+	bool
+	depends on GDBSTUB_ON_TTYS0 || GDBSTUB_ON_TTYS1
+	default y
+
+endmenu
diff --git a/arch/mn10300/Makefile b/arch/mn10300/Makefile
new file mode 100644
index 000000000000..6673a28ec07a
--- /dev/null
+++ b/arch/mn10300/Makefile
@@ -0,0 +1,135 @@
+###############################################################################
+#
+# MN10300 Kernel makefile system specifications
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Modified by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+
+KBUILD_DEFCONFIG := asb2303_defconfig
+
+CCSPECS	:= $(shell $(CC) -v 2>&1 | grep "^Reading specs from " | head -1 | cut -c20-)
+CCDIR	:= $(strip $(patsubst %/specs,%,$(CCSPECS)))
+KBUILD_CPPFLAGS += -nostdinc -I$(CCDIR)/include
+
+LDFLAGS		:=
+OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
+#LDFLAGS_vmlinux := -Map linkmap.txt
+CHECKFLAGS	+=
+
+PROCESSOR	:= unset
+UNIT		:= unset
+
+KBUILD_CFLAGS	+= -mam33 -mmem-funcs -DCPU=AM33
+KBUILD_AFLAGS	+= -mam33 -DCPU=AM33
+
+ifeq ($(CONFIG_MN10300_CURRENT_IN_E2),y)
+KBUILD_CFLAGS	+= -ffixed-e2 -fcall-saved-e5
+endif
+
+ifeq ($(CONFIG_MN10300_PROC_MN103E010),y)
+PROCESSOR	:= mn103e010
+endif
+
+ifeq ($(CONFIG_MN10300_UNIT_ASB2303),y)
+UNIT		:= asb2303
+endif
+ifeq ($(CONFIG_MN10300_UNIT_ASB2305),y)
+UNIT		:= asb2305
+endif
+
+
+head-y		:= arch/mn10300/kernel/head.o arch/mn10300/kernel/init_task.o
+
+core-y		+= arch/mn10300/kernel/ arch/mn10300/mm/
+
+ifneq ($(PROCESSOR),unset)
+core-y		+= arch/mn10300/proc-$(PROCESSOR)/
+endif
+ifneq ($(UNIT),unset)
+core-y		+= arch/mn10300/unit-$(UNIT)/
+endif
+libs-y		+= arch/mn10300/lib/
+
+drivers-$(CONFIG_OPROFILE)	+= arch/mn10300/oprofile/
+
+boot := arch/mn10300/boot
+
+.PHONY: zImage
+
+KBUILD_IMAGE := $(boot)/zImage
+CLEAN_FILES += $(boot)/zImage
+CLEAN_FILES += $(boot)/compressed/vmlinux
+CLEAN_FILES += $(boot)/compressed/vmlinux.bin
+CLEAN_FILES += $(boot)/compressed/vmlinux.bin.gz
+
+zImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+all: zImage
+
+bootstrap:
+	$(Q)$(MAKEBOOT) bootstrap
+
+archclean:
+	$(Q)$(MAKE) $(clean)=arch/mn10300/proc-mn103e010
+	$(Q)$(MAKE) $(clean)=arch/mn10300/unit-asb2303
+	$(Q)$(MAKE) $(clean)=arch/mn10300/unit-asb2305
+
+define archhelp
+  echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
+endef
+
+# If you make sure the .S files get compiled with debug info,
+# uncomment the following to disable optimisations
+# that are unhelpful whilst debugging.
+ifdef CONFIG_DEBUG_INFO
+#KBUILD_CFLAGS	+= -O1
+KBUILD_AFLAGS	+= -Wa,--gdwarf2
+endif
+
+###################################################################################################
+#
+# juggle some symlinks in the MN10300 asm include dir
+#
+#	Update machine proc and unit symlinks if something which affects
+#	them changed.  We use .proc / .unit to indicate when they were
+#	updated last, otherwise make uses the target directory mtime.
+#
+###################################################################################################
+
+# processor specific definitions
+include/asm-mn10300/.proc: $(wildcard include/config/proc/*.h) include/config/auto.conf
+	@echo '  SYMLINK include/asm-mn10300/proc -> include/asm-mn10300/proc-$(PROCESSOR)'
+ifneq ($(KBUILD_SRC),)
+	$(Q)mkdir -p include/asm-mn10300
+	$(Q)ln -fsn $(srctree)/include/asm-mn10300/proc-$(PROCESSOR) include/asm-mn10300/proc
+else
+	$(Q)ln -fsn proc-$(PROCESSOR) include/asm-mn10300/proc
+endif
+	@touch $@
+
+CLEAN_FILES += include/asm-mn10300/proc include/asm-mn10300/.proc
+
+prepare: include/asm-mn10300/.proc
+
+# unit specific definitions
+include/asm-mn10300/.unit: $(wildcard include/config/unit/*.h) include/config/auto.conf
+	@echo '  SYMLINK include/asm-mn10300/unit -> include/asm-mn10300/unit-$(UNIT)'
+ifneq ($(KBUILD_SRC),)
+	$(Q)mkdir -p include/asm-mn10300
+	$(Q)ln -fsn $(srctree)/include/asm-mn10300/unit-$(UNIT) include/asm-mn10300/unit
+else
+	$(Q)ln -fsn unit-$(UNIT) include/asm-mn10300/unit
+endif
+	@touch $@
+
+CLEAN_FILES += include/asm-mn10300/unit include/asm-mn10300/.unit
+
+prepare: include/asm-mn10300/.unit
diff --git a/arch/mn10300/boot/.gitignore b/arch/mn10300/boot/.gitignore
new file mode 100644
index 000000000000..b6718de23693
--- /dev/null
+++ b/arch/mn10300/boot/.gitignore
@@ -0,0 +1 @@
+zImage
diff --git a/arch/mn10300/boot/Makefile b/arch/mn10300/boot/Makefile
new file mode 100644
index 000000000000..36c9caf8ea0a
--- /dev/null
+++ b/arch/mn10300/boot/Makefile
@@ -0,0 +1,28 @@
+# MN10300 kernel compressor and wrapper
+#
+# Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+
+targets		:= vmlinux.bin zImage
+
+subdir- 	:= compressed
+
+# ---------------------------------------------------------------------------
+
+
+$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+	@echo 'Kernel: $@ is ready'
+
+$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
diff --git a/arch/mn10300/boot/compressed/Makefile b/arch/mn10300/boot/compressed/Makefile
new file mode 100644
index 000000000000..08a95e171685
--- /dev/null
+++ b/arch/mn10300/boot/compressed/Makefile
@@ -0,0 +1,22 @@
+#
+# Create a compressed vmlinux image from the original vmlinux
+#
+
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
+
+LDFLAGS_vmlinux := -Ttext $(CONFIG_KERNEL_ZIMAGE_BASE_ADDRESS) -e startup_32
+
+$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r --format binary --oformat elf32-am33lin -T
+
+$(obj)/piggy.o: $(obj)/vmlinux.lds $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,ld)
diff --git a/arch/mn10300/boot/compressed/head.S b/arch/mn10300/boot/compressed/head.S
new file mode 100644
index 000000000000..502e1eb56709
--- /dev/null
+++ b/arch/mn10300/boot/compressed/head.S
@@ -0,0 +1,86 @@
+/* Boot entry point for a compressed MN10300 kernel
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+	.section	.text
+
+#define DEBUG
+
+#include <linux/linkage.h>
+#include <asm/cpu-regs.h>
+
+	.globl startup_32
+startup_32:
+	# first save off parameters from bootloader
+	mov	param_save_area,a0
+	mov	d0,(a0)
+	mov	d1,(4,a0)
+	mov	d2,(8,a0)
+
+	mov	sp,a3
+	mov	decomp_stack+0x2000-4,a0
+	mov	a0,sp
+
+	# invalidate and enable both of the caches
+	mov	CHCTR,a0
+	clr	d0
+	movhu	d0,(a0)					# turn off first
+	mov	CHCTR_ICINV|CHCTR_DCINV,d0
+	movhu	d0,(a0)
+	setlb
+	mov	(a0),d0
+	btst	CHCTR_ICBUSY|CHCTR_DCBUSY,d0		# wait till not busy
+	lne
+	mov	CHCTR_ICEN|CHCTR_DCEN|CHCTR_DCWTMD,d0	# writethru dcache
+	movhu	d0,(a0)					# enable
+
+	# clear the BSS area
+	mov	__bss_start,a0
+	mov	_end,a1
+	clr	d0
+bssclear:
+	cmp	a1,a0
+	bge	bssclear_end
+	movbu	d0,(a0)
+	inc	a0
+	bra	bssclear
+bssclear_end:
+
+	# decompress the kernel
+	call	decompress_kernel[],0
+
+	# disable caches again
+	mov	CHCTR,a0
+	clr	d0
+	movhu	d0,(a0)
+	setlb
+	mov	(a0),d0
+	btst	CHCTR_ICBUSY|CHCTR_DCBUSY,d0		# wait till not busy
+	lne
+
+	mov	param_save_area,a0
+	mov	(a0),d0
+	mov	(4,a0),d1
+	mov	(8,a0),d2
+
+	mov	a3,sp
+	mov	CONFIG_KERNEL_TEXT_ADDRESS,a0
+	jmp	(a0)
+
+	.data
+	.align		4
+param_save_area:
+	.rept 3
+	.word		0
+	.endr
+
+	.section	.bss
+	.align		4
+decomp_stack:
+	.space		0x2000
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c
new file mode 100644
index 000000000000..ded207efc97a
--- /dev/null
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -0,0 +1,429 @@
+/* MN10300 Miscellaneous helper routines for kernel decompressor
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ * - Derived from arch/x86/boot/compressed/misc_32.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/compiler.h>
+#include <asm/serial-regs.h>
+#include "misc.h"
+
+#ifndef CONFIG_GDBSTUB_ON_TTYSx
+/* display 'Uncompressing Linux... ' messages on ttyS0 or ttyS1 */
+#if 1	/* ttyS0 */
+#define CYG_DEV_BASE	0xA6FB0000
+#else   /* ttyS1 */
+#define CYG_DEV_BASE	0xA6FC0000
+#endif
+
+#define CYG_DEV_THR	(*((volatile __u8*)(CYG_DEV_BASE + 0x00)))
+#define CYG_DEV_MCR	(*((volatile __u8*)(CYG_DEV_BASE + 0x10)))
+#define SIO_MCR_DTR	0x01
+#define SIO_MCR_RTS	0x02
+#define CYG_DEV_LSR	(*((volatile __u8*)(CYG_DEV_BASE + 0x14)))
+#define SIO_LSR_THRE	0x20		/* transmitter holding register empty */
+#define SIO_LSR_TEMT	0x40		/* transmitter register empty */
+#define CYG_DEV_MSR	(*((volatile __u8*)(CYG_DEV_BASE + 0x18)))
+#define SIO_MSR_CTS	0x10		/* clear to send */
+#define SIO_MSR_DSR	0x20		/* data set ready */
+
+#define LSR_WAIT_FOR(STATE) \
+	do { while (!(CYG_DEV_LSR & SIO_LSR_##STATE)) {} } while (0)
+#define FLOWCTL_QUERY(LINE) \
+	({ CYG_DEV_MSR & SIO_MSR_##LINE; })
+#define FLOWCTL_WAIT_FOR(LINE) \
+	do { while (!(CYG_DEV_MSR & SIO_MSR_##LINE)) {} } while (0)
+#define FLOWCTL_CLEAR(LINE) \
+	do { CYG_DEV_MCR &= ~SIO_MCR_##LINE; } while (0)
+#define FLOWCTL_SET(LINE) \
+	do { CYG_DEV_MCR |= SIO_MCR_##LINE; } while (0)
+#endif
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+#define STATIC static
+
+#undef memset
+#undef memcpy
+
+static inline void *memset(const void *s, int c, size_t n)
+{
+	int i;
+	char *ss = (char *) s;
+
+	for (i = 0; i < n; i++)
+		ss[i] = c;
+	return (void *)s;
+}
+
+#define memzero(s, n) memset((s), 0, (n))
+
+static inline void *memcpy(void *__dest, const void *__src, size_t __n)
+{
+	int i;
+	const char *s = __src;
+	char *d = __dest;
+
+	for (i = 0; i < __n; i++)
+		d[i] = s[i];
+	return __dest;
+}
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x8000	/* Window size must be at least 32k, and a power of
+			 * two */
+
+static uch *inbuf;	/* input buffer */
+static uch window[WSIZE]; /* sliding window buffer */
+
+static unsigned insize;	/* valid bytes in inbuf */
+static unsigned inptr;	/* index of next byte to be processed in inbuf */
+static unsigned outcnt;	/* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond, msg) { if (!(cond)) error(msg); }
+#  define Trace(x)	fprintf x
+#  define Tracev(x)	{ if (verbose) fprintf x ; }
+#  define Tracevv(x)	{ if (verbose > 1) fprintf x ; }
+#  define Tracec(c, x)	{ if (verbose && (c)) fprintf x ; }
+#  define Tracecv(c, x)	{ if (verbose > 1 && (c)) fprintf x ; }
+#else
+#  define Assert(cond, msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
+#endif
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(const char *) __attribute__((noreturn));
+static void kputs(const char *);
+
+static inline unsigned char get_byte(void)
+{
+	unsigned char ch = inptr < insize ? inbuf[inptr++] : fill_inbuf();
+
+#if 0
+	char hex[3];
+	hex[0] = ((ch & 0x0f) > 9) ?
+		((ch & 0x0f) + 'A' - 0xa) : ((ch & 0x0f) + '0');
+	hex[1] = ((ch >> 4) > 9) ?
+		((ch >> 4) + 'A' - 0xa) : ((ch >> 4) + '0');
+	hex[2] = 0;
+	kputs(hex);
+#endif
+	return ch;
+}
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define EXT_MEM_K (*(unsigned short *)0x90002)
+#ifndef STANDARD_MEMORY_BIOS_CALL
+#define ALT_MEM_K (*(unsigned long *) 0x901e0)
+#endif
+#define SCREEN_INFO (*(struct screen_info *)0x90000)
+
+static long bytes_out;
+static uch *output_data;
+static unsigned long output_ptr;
+
+
+static void *malloc(int size);
+
+static inline void free(void *where)
+{	/* Don't care */
+}
+
+static unsigned long free_mem_ptr = (unsigned long) &end;
+static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
+
+static inline void gzip_mark(void **ptr)
+{
+	kputs(".");
+	*ptr = (void *) free_mem_ptr;
+}
+
+static inline void gzip_release(void **ptr)
+{
+	free_mem_ptr = (unsigned long) *ptr;
+}
+
+#define INPLACE_MOVE_ROUTINE	0x1000
+#define LOW_BUFFER_START	0x2000
+#define LOW_BUFFER_END		0x90000
+#define LOW_BUFFER_SIZE		(LOW_BUFFER_END - LOW_BUFFER_START)
+#define HEAP_SIZE		0x3000
+static int high_loaded;
+static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+
+static char *vidmem = (char *)0xb8000;
+static int lines, cols;
+
+#include "../../../../lib/inflate.c"
+
+static void *malloc(int size)
+{
+	void *p;
+
+	if (size < 0)
+		error("Malloc error\n");
+	if (!free_mem_ptr)
+		error("Memory error\n");
+
+	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
+
+	p = (void *) free_mem_ptr;
+	free_mem_ptr += size;
+
+	if (free_mem_ptr >= free_mem_end_ptr)
+		error("\nOut of memory\n");
+
+	return p;
+}
+
+static inline void scroll(void)
+{
+	int i;
+
+	memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+	for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
+		vidmem[i] = ' ';
+}
+
+static inline void kputchar(unsigned char ch)
+{
+#ifdef CONFIG_MN10300_UNIT_ASB2305
+	while (SC0STR & SC01STR_TBF)
+		continue;
+
+	if (ch == 0x0a) {
+		SC0TXB = 0x0d;
+		while (SC0STR & SC01STR_TBF)
+			continue;
+	}
+
+	SC0TXB = ch;
+
+#else
+	while (SC1STR & SC01STR_TBF)
+		continue;
+
+	if (ch == 0x0a) {
+		SC1TXB = 0x0d;
+		while (SC1STR & SC01STR_TBF)
+			continue;
+	}
+
+	SC1TXB = ch;
+
+#endif
+}
+
+static void kputs(const char *s)
+{
+#ifdef CONFIG_DEBUG_DECOMPRESS_KERNEL
+#ifndef CONFIG_GDBSTUB_ON_TTYSx
+	char ch;
+
+	FLOWCTL_SET(DTR);
+
+	while (*s) {
+		LSR_WAIT_FOR(THRE);
+
+		ch = *s++;
+		if (ch == 0x0a) {
+			CYG_DEV_THR = 0x0d;
+			LSR_WAIT_FOR(THRE);
+		}
+		CYG_DEV_THR = ch;
+	}
+
+	FLOWCTL_CLEAR(DTR);
+#else
+
+	for (; *s; s++)
+		kputchar(*s);
+
+#endif
+#endif /* CONFIG_DEBUG_DECOMPRESS_KERNEL */
+}
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int fill_inbuf()
+{
+	if (insize != 0)
+		error("ran out of input data\n");
+
+	inbuf = input_data;
+	insize = input_len;
+	inptr = 1;
+	return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+static void flush_window_low(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in, *out, ch;
+
+    in = window;
+    out = &output_data[output_ptr];
+    for (n = 0; n < outcnt; n++) {
+	    ch = *out++ = *in++;
+	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    output_ptr += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window_high(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in,  ch;
+    in = window;
+    for (n = 0; n < outcnt; n++) {
+	ch = *output_data++ = *in++;
+	if ((ulg) output_data == LOW_BUFFER_END)
+		output_data = high_buffer_start;
+	c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window(void)
+{
+	if (high_loaded)
+		flush_window_high();
+	else
+		flush_window_low();
+}
+
+static void error(const char *x)
+{
+	kputs("\n\n");
+	kputs(x);
+	kputs("\n\n -- System halted");
+
+	while (1)
+		/* Halt */;
+}
+
+#define STACK_SIZE (4096)
+
+long user_stack[STACK_SIZE];
+
+struct {
+	long *a;
+	short b;
+} stack_start = { &user_stack[STACK_SIZE], 0 };
+
+void setup_normal_output_buffer(void)
+{
+#ifdef STANDARD_MEMORY_BIOS_CALL
+	if (EXT_MEM_K < 1024)
+		error("Less than 2MB of memory.\n");
+#else
+	if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024)
+		error("Less than 2MB of memory.\n");
+#endif
+	output_data = (char *) 0x100000; /* Points to 1M */
+}
+
+struct moveparams {
+	uch *low_buffer_start;
+	int lcount;
+	uch *high_buffer_start;
+	int hcount;
+};
+
+void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+	high_buffer_start = (uch *)(((ulg) &end) + HEAP_SIZE);
+#ifdef STANDARD_MEMORY_BIOS_CALL
+	if (EXT_MEM_K < (3 * 1024))
+		error("Less than 4MB of memory.\n");
+#else
+	if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3 * 1024))
+		error("Less than 4MB of memory.\n");
+#endif
+	mv->low_buffer_start = output_data = (char *) LOW_BUFFER_START;
+	high_loaded = 1;
+	free_mem_end_ptr = (long) high_buffer_start;
+	if (0x100000 + LOW_BUFFER_SIZE > (ulg) high_buffer_start) {
+		high_buffer_start = (uch *)(0x100000 + LOW_BUFFER_SIZE);
+		mv->hcount = 0; /* say: we need not to move high_buffer */
+	} else {
+		mv->hcount = -1;
+	}
+	mv->high_buffer_start = high_buffer_start;
+}
+
+void close_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+	mv->lcount = bytes_out;
+	if (bytes_out > LOW_BUFFER_SIZE) {
+		mv->lcount = LOW_BUFFER_SIZE;
+		if (mv->hcount)
+			mv->hcount = bytes_out - LOW_BUFFER_SIZE;
+	} else {
+		mv->hcount = 0;
+	}
+}
+
+#undef DEBUGFLAG
+#ifdef DEBUGFLAG
+int debugflag;
+#endif
+
+int decompress_kernel(struct moveparams *mv)
+{
+#ifdef DEBUGFLAG
+	while (!debugflag)
+		barrier();
+#endif
+
+	output_data = (char *) CONFIG_KERNEL_TEXT_ADDRESS;
+
+	makecrc();
+	kputs("Uncompressing Linux... ");
+	gunzip();
+	kputs("Ok, booting the kernel.\n");
+	return 0;
+}
diff --git a/arch/mn10300/boot/compressed/misc.h b/arch/mn10300/boot/compressed/misc.h
new file mode 100644
index 000000000000..da921cd172fb
--- /dev/null
+++ b/arch/mn10300/boot/compressed/misc.h
@@ -0,0 +1,18 @@
+/* Internal definitions for the MN10300 kernel decompressor
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+extern int end;
+
+/*
+ * vmlinux.lds
+ */
+extern char input_data[];
+extern int input_len;
diff --git a/arch/mn10300/boot/compressed/vmlinux.lds b/arch/mn10300/boot/compressed/vmlinux.lds
new file mode 100644
index 000000000000..a084903603fe
--- /dev/null
+++ b/arch/mn10300/boot/compressed/vmlinux.lds
@@ -0,0 +1,9 @@
+SECTIONS
+{
+  .data : {
+	input_len = .;
+	LONG(input_data_end - input_data) input_data = .;
+	*(.data)
+	input_data_end = .;
+	}
+}
diff --git a/arch/mn10300/boot/install.sh b/arch/mn10300/boot/install.sh
new file mode 100644
index 000000000000..072951c83976
--- /dev/null
+++ b/arch/mn10300/boot/install.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# arch/mn10300/boot/install -c.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# Licence.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install -c" script for i386 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install -c path (blank if root directory)
+#   $5 - boot rom file
+#
+
+# User may have a custom install -c script
+
+rm -fr $4/../usr/include/linux $4/../usr/include/asm
+install -c -m 0755 $2 $4/vmlinuz
+install -c -m 0755 $5 $4/boot.rom
+install -c -m 0755 -d $4/../usr/include/linux
+cd $TOPDIR/include/linux
+for i in `find . -maxdepth 1 -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux
+done
+install -c -m 0755 -d $4/../usr/include/linux/byteorder
+cd $TOPDIR/include/linux/byteorder
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/byteorder
+done
+install -c -m 0755 -d $4/../usr/include/linux/lockd
+cd $TOPDIR/include/linux/lockd
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/lockd
+done
+install -c -m 0755 -d $4/../usr/include/linux/netfilter_ipv4
+cd $TOPDIR/include/linux/netfilter_ipv4
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/netfilter_ipv4
+done
+install -c -m 0755 -d $4/../usr/include/linux/nfsd
+cd $TOPDIR/include/linux/nfsd
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/nfsd/$i
+done
+install -c -m 0755 -d $4/../usr/include/linux/raid
+cd $TOPDIR/include/linux/raid
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/raid
+done
+install -c -m 0755 -d $4/../usr/include/linux/sunrpc
+cd $TOPDIR/include/linux/sunrpc
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/linux/sunrpc
+done
+install -c -m 0755 -d $4/../usr/include/asm
+cd $TOPDIR/include/asm
+for i in `find . -name '*.h' -print`; do
+  install -c -m 0644 $i $4/../usr/include/asm
+done
diff --git a/arch/mn10300/boot/tools/build.c b/arch/mn10300/boot/tools/build.c
new file mode 100644
index 000000000000..4f552ead0f8e
--- /dev/null
+++ b/arch/mn10300/boot/tools/build.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1997 Martin Mares
+ */
+
+/*
+ * This file builds a disk-image from three different files:
+ *
+ * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest
+ * - setup: 8086 machine code, sets up system parm
+ * - system: 80386 code for actual system
+ *
+ * It does some checking that all files are of the correct type, and
+ * just writes the result to stdout, removing headers and padding to
+ * the right amount. It also writes some system data to stderr.
+ */
+
+/*
+ * Changes by tytso to allow root device specification
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * Cross compiling fixes by Gertjan van Wingerde, July 1996
+ * Rewritten by Martin Mares, April 1997
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <asm/boot.h>
+
+#define DEFAULT_MAJOR_ROOT 0
+#define DEFAULT_MINOR_ROOT 0
+
+/* Minimal number of setup sectors (see also bootsect.S) */
+#define SETUP_SECTS 4
+
+uint8_t buf[1024];
+int fd;
+int is_big_kernel;
+
+__attribute__((noreturn))
+void die(const char *str, ...)
+{
+	va_list args;
+	va_start(args, str);
+	vfprintf(stderr, str, args);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+void file_open(const char *name)
+{
+	fd = open(name, O_RDONLY, 0);
+	if (fd < 0)
+		die("Unable to open `%s': %m", name);
+}
+
+__attribute__((noreturn))
+void usage(void)
+{
+	die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int i, c, sz, setup_sectors;
+	uint32_t sys_size;
+	uint8_t major_root, minor_root;
+	struct stat sb;
+
+	if (argc > 2 && !strcmp(argv[1], "-b")) {
+		is_big_kernel = 1;
+		argc--, argv++;
+	}
+	if ((argc < 4) || (argc > 5))
+		usage();
+	if (argc > 4) {
+		if (!strcmp(argv[4], "CURRENT")) {
+			if (stat("/", &sb)) {
+				perror("/");
+				die("Couldn't stat /");
+			}
+			major_root = major(sb.st_dev);
+			minor_root = minor(sb.st_dev);
+		} else if (strcmp(argv[4], "FLOPPY")) {
+			if (stat(argv[4], &sb)) {
+				perror(argv[4]);
+				die("Couldn't stat root device.");
+			}
+			major_root = major(sb.st_rdev);
+			minor_root = minor(sb.st_rdev);
+		} else {
+			major_root = 0;
+			minor_root = 0;
+		}
+	} else {
+		major_root = DEFAULT_MAJOR_ROOT;
+		minor_root = DEFAULT_MINOR_ROOT;
+	}
+	fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
+
+	file_open(argv[1]);
+	i = read(fd, buf, sizeof(buf));
+	fprintf(stderr, "Boot sector %d bytes.\n", i);
+	if (i != 512)
+		die("Boot block must be exactly 512 bytes");
+	if (buf[510] != 0x55 || buf[511] != 0xaa)
+		die("Boot block hasn't got boot flag (0xAA55)");
+	buf[508] = minor_root;
+	buf[509] = major_root;
+	if (write(1, buf, 512) != 512)
+		die("Write call failed");
+	close(fd);
+
+	/* Copy the setup code */
+	file_open(argv[2]);
+	for (i = 0; (c = read(fd, buf, sizeof(buf))) > 0; i += c)
+		if (write(1, buf, c) != c)
+			die("Write call failed");
+	if (c != 0)
+		die("read-error on `setup'");
+	close(fd);
+
+	/* Pad unused space with zeros */
+	setup_sectors = (i + 511) / 512;
+	/* for compatibility with ancient versions of LILO. */
+	if (setup_sectors < SETUP_SECTS)
+		setup_sectors = SETUP_SECTS;
+	fprintf(stderr, "Setup is %d bytes.\n", i);
+	memset(buf, 0, sizeof(buf));
+	while (i < setup_sectors * 512) {
+		c = setup_sectors * 512 - i;
+		if (c > sizeof(buf))
+			c = sizeof(buf);
+		if (write(1, buf, c) != c)
+			die("Write call failed");
+		i += c;
+	}
+
+	file_open(argv[3]);
+	if (fstat(fd, &sb))
+		die("Unable to stat `%s': %m", argv[3]);
+	sz = sb.st_size;
+	fprintf(stderr, "System is %d kB\n", sz / 1024);
+	sys_size = (sz + 15) / 16;
+	/* 0x28000*16 = 2.5 MB, conservative estimate for the current maximum */
+	if (sys_size > (is_big_kernel ? 0x28000 : DEF_SYSSIZE))
+		die("System is too big. Try using %smodules.",
+			is_big_kernel ? "" : "bzImage or ");
+	if (sys_size > 0xffff)
+		fprintf(stderr,
+			"warning: kernel is too big for standalone boot "
+			"from floppy\n");
+	while (sz > 0) {
+		int l, n;
+
+		l = (sz > sizeof(buf)) ? sizeof(buf) : sz;
+		n = read(fd, buf, l);
+		if (n != l) {
+			if (n < 0)
+				die("Error reading %s: %m", argv[3]);
+			else
+				die("%s: Unexpected EOF", argv[3]);
+		}
+		if (write(1, buf, l) != l)
+			die("Write failed");
+		sz -= l;
+	}
+	close(fd);
+
+	/* Write sizes to the bootsector */
+	if (lseek(1, 497, SEEK_SET) != 497)
+		die("Output: seek failed");
+	buf[0] = setup_sectors;
+	if (write(1, buf, 1) != 1)
+		die("Write of setup sector count failed");
+	if (lseek(1, 500, SEEK_SET) != 500)
+		die("Output: seek failed");
+	buf[0] = (sys_size & 0xff);
+	buf[1] = ((sys_size >> 8) & 0xff);
+	if (write(1, buf, 2) != 2)
+		die("Write of image length failed");
+
+	return 0;
+}
diff --git a/arch/mn10300/configs/asb2303_defconfig b/arch/mn10300/configs/asb2303_defconfig
new file mode 100644
index 000000000000..0189a058da9f
--- /dev/null
+++ b/arch/mn10300/configs/asb2303_defconfig
@@ -0,0 +1,555 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.24-rc2
+# Fri Nov 16 13:36:38 2007
+#
+CONFIG_MN10300=y
+CONFIG_AM33=y
+CONFIG_MMU=y
+# CONFIG_HIGHMEM is not set
+# CONFIG_NUMA is not set
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_BUG=y
+CONFIG_QUICKLIST=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+# CONFIG_ARCH_SUPPORTS_AOUT is not set
+CONFIG_GENERIC_HARDIRQS=y
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+# CONFIG_FAIR_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED is not set
+# CONFIG_RELAY is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_MODULES is not set
+# CONFIG_BLOCK is not set
+
+#
+# Matsushita MN10300 system setup
+#
+CONFIG_MN10300_UNIT_ASB2303=y
+# CONFIG_MN10300_UNIT_ASB2305 is not set
+CONFIG_MN10300_PROC_MN103E010=y
+CONFIG_MN10300_CPU_AM33V2=y
+CONFIG_FPU=y
+CONFIG_MN10300_CACHE_WBACK=y
+# CONFIG_MN10300_CACHE_WTHRU is not set
+# CONFIG_MN10300_CACHE_DISABLED is not set
+
+#
+# Memory layout options
+#
+CONFIG_KERNEL_RAM_BASE_ADDRESS=0x90000000
+CONFIG_INTERRUPT_VECTOR_BASE=0x90000000
+CONFIG_KERNEL_TEXT_ADDRESS=0x90001000
+CONFIG_KERNEL_ZIMAGE_BASE_ADDRESS=0x90700000
+CONFIG_PREEMPT=y
+CONFIG_PREEMPT_BKL=y
+CONFIG_MN10300_CURRENT_IN_E2=y
+CONFIG_MN10300_USING_JTAG=y
+CONFIG_MN10300_RTC=y
+CONFIG_MN10300_WD_TIMER=y
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+
+#
+# MN10300 internal serial options
+#
+CONFIG_MN10300_PROC_HAS_TTYSM0=y
+CONFIG_MN10300_PROC_HAS_TTYSM1=y
+CONFIG_MN10300_PROC_HAS_TTYSM2=y
+CONFIG_MN10300_TTYSM=y
+CONFIG_MN10300_TTYSM_CONSOLE=y
+CONFIG_MN10300_TTYSM0=y
+CONFIG_MN10300_TTYSM0_TIMER8=y
+# CONFIG_MN10300_TTYSM0_TIMER2 is not set
+CONFIG_MN10300_TTYSM1=y
+CONFIG_MN10300_TTYSM1_TIMER9=y
+# CONFIG_MN10300_TTYSM1_TIMER3 is not set
+# CONFIG_MN10300_TTYSM2 is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=1
+CONFIG_VIRT_TO_BUS=y
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# Executable formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+CONFIG_MTD_DEBUG=y
+CONFIG_MTD_DEBUG_VERBOSE=0
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
+CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
+# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_GEN_PROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+CONFIG_MTD_CFI_GEOMETRY=y
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+CONFIG_MTD_CFI_I4=y
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_OTP is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+
+#
+# SCSI device support
+#
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+CONFIG_SMC91X=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_RTC=y
+# CONFIG_R3964 is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_RTC_CLASS is not set
+
+#
+# Userspace I/O
+#
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SAMPLES is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
diff --git a/arch/mn10300/kernel/Makefile b/arch/mn10300/kernel/Makefile
new file mode 100644
index 000000000000..ef07c956170a
--- /dev/null
+++ b/arch/mn10300/kernel/Makefile
@@ -0,0 +1,27 @@
+#
+# Makefile for the MN10300-specific core kernel code
+#
+extra-y := head.o init_task.o vmlinux.lds
+
+obj-y   := process.o semaphore.o signal.o entry.o fpu.o traps.o irq.o \
+	   ptrace.o setup.o time.o sys_mn10300.o io.o kthread.o \
+	   switch_to.o mn10300_ksyms.o kernel_execve.o
+
+obj-$(CONFIG_MN10300_WD_TIMER) += mn10300-watchdog.o mn10300-watchdog-low.o
+
+obj-$(CONFIG_FPU) += fpu-low.o
+
+obj-$(CONFIG_MN10300_TTYSM) += mn10300-serial.o mn10300-serial-low.o \
+			       mn10300-debug.o
+obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-low.o
+obj-$(CONFIG_GDBSTUB_ON_TTYSx) += gdb-io-serial.o gdb-io-serial-low.o
+obj-$(CONFIG_GDBSTUB_ON_TTYSMx) += gdb-io-ttysm.o gdb-io-ttysm-low.o
+
+ifneq ($(CONFIG_MN10300_CACHE_DISABLED),y)
+obj-$(CONFIG_GDBSTUB) += gdb-cache.o
+endif
+
+obj-$(CONFIG_MN10300_RTC) += rtc.o
+obj-$(CONFIG_PROFILE) += profile.o profile-low.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
new file mode 100644
index 000000000000..ee2d9f8af5ad
--- /dev/null
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -0,0 +1,108 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <asm/ucontext.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/ptrace.h>
+#include "sigframe.h"
+#include "mn10300-serial.h"
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->")
+
+#define OFFSET(sym, str, mem) \
+	DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+	OFFSET(SIGCONTEXT_d0, sigcontext, d0);
+	OFFSET(SIGCONTEXT_d1, sigcontext, d1);
+	BLANK();
+
+	OFFSET(TI_task,			thread_info, task);
+	OFFSET(TI_exec_domain,		thread_info, exec_domain);
+	OFFSET(TI_flags,		thread_info, flags);
+	OFFSET(TI_cpu,			thread_info, cpu);
+	OFFSET(TI_preempt_count,	thread_info, preempt_count);
+	OFFSET(TI_addr_limit,		thread_info, addr_limit);
+	OFFSET(TI_restart_block,	thread_info, restart_block);
+	BLANK();
+
+	OFFSET(REG_D0,			pt_regs, d0);
+	OFFSET(REG_D1,			pt_regs, d1);
+	OFFSET(REG_D2,			pt_regs, d2);
+	OFFSET(REG_D3,			pt_regs, d3);
+	OFFSET(REG_A0,			pt_regs, a0);
+	OFFSET(REG_A1,			pt_regs, a1);
+	OFFSET(REG_A2,			pt_regs, a2);
+	OFFSET(REG_A3,			pt_regs, a3);
+	OFFSET(REG_E0,			pt_regs, e0);
+	OFFSET(REG_E1,			pt_regs, e1);
+	OFFSET(REG_E2,			pt_regs, e2);
+	OFFSET(REG_E3,			pt_regs, e3);
+	OFFSET(REG_E4,			pt_regs, e4);
+	OFFSET(REG_E5,			pt_regs, e5);
+	OFFSET(REG_E6,			pt_regs, e6);
+	OFFSET(REG_E7,			pt_regs, e7);
+	OFFSET(REG_SP,			pt_regs, sp);
+	OFFSET(REG_EPSW,		pt_regs, epsw);
+	OFFSET(REG_PC,			pt_regs, pc);
+	OFFSET(REG_LAR,			pt_regs, lar);
+	OFFSET(REG_LIR,			pt_regs, lir);
+	OFFSET(REG_MDR,			pt_regs, mdr);
+	OFFSET(REG_MCVF,		pt_regs, mcvf);
+	OFFSET(REG_MCRL,		pt_regs, mcrl);
+	OFFSET(REG_MCRH,		pt_regs, mcrh);
+	OFFSET(REG_MDRQ,		pt_regs, mdrq);
+	OFFSET(REG_ORIG_D0,		pt_regs, orig_d0);
+	OFFSET(REG_NEXT,		pt_regs, next);
+	DEFINE(REG__END,		sizeof(struct pt_regs));
+	BLANK();
+
+	OFFSET(THREAD_UREGS,		thread_struct, uregs);
+	OFFSET(THREAD_PC,		thread_struct, pc);
+	OFFSET(THREAD_SP,		thread_struct, sp);
+	OFFSET(THREAD_A3,		thread_struct, a3);
+	OFFSET(THREAD_USP,		thread_struct, usp);
+	OFFSET(THREAD_FRAME,		thread_struct, __frame);
+	BLANK();
+
+	DEFINE(CLONE_VM_asm,		CLONE_VM);
+	DEFINE(CLONE_FS_asm,		CLONE_FS);
+	DEFINE(CLONE_FILES_asm,		CLONE_FILES);
+	DEFINE(CLONE_SIGHAND_asm,	CLONE_SIGHAND);
+	DEFINE(CLONE_UNTRACED_asm,	CLONE_UNTRACED);
+	DEFINE(SIGCHLD_asm,		SIGCHLD);
+	BLANK();
+
+	OFFSET(EXEC_DOMAIN_handler,	exec_domain, handler);
+	OFFSET(RT_SIGFRAME_sigcontext,	rt_sigframe, uc.uc_mcontext);
+
+	DEFINE(PAGE_SIZE_asm,		PAGE_SIZE);
+
+	OFFSET(__rx_buffer,		mn10300_serial_port, rx_buffer);
+	OFFSET(__rx_inp,		mn10300_serial_port, rx_inp);
+	OFFSET(__rx_outp,		mn10300_serial_port, rx_outp);
+	OFFSET(__tx_info_buffer,	mn10300_serial_port, uart.info);
+	OFFSET(__tx_xchar,		mn10300_serial_port, tx_xchar);
+	OFFSET(__tx_break,		mn10300_serial_port, tx_break);
+	OFFSET(__intr_flags,		mn10300_serial_port, intr_flags);
+	OFFSET(__rx_icr,		mn10300_serial_port, rx_icr);
+	OFFSET(__tx_icr,		mn10300_serial_port, tx_icr);
+	OFFSET(__tm_icr,		mn10300_serial_port, _tmicr);
+	OFFSET(__iobase,		mn10300_serial_port, _iobase);
+
+	DEFINE(__UART_XMIT_SIZE,	UART_XMIT_SIZE);
+	OFFSET(__xmit_buffer,		uart_info, xmit.buf);
+	OFFSET(__xmit_head,		uart_info, xmit.head);
+	OFFSET(__xmit_tail,		uart_info, xmit.tail);
+}
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
new file mode 100644
index 000000000000..11de3606eee6
--- /dev/null
+++ b/arch/mn10300/kernel/entry.S
@@ -0,0 +1,721 @@
+###############################################################################
+#
+# MN10300 Exception and interrupt entry points
+#
+# Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Modified by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/intctl-regs.h>
+#include <asm/busctl-regs.h>
+#include <asm/timer-regs.h>
+#include <asm/unit/leds.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/frame.inc>
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop		__cli
+#else
+#define preempt_stop
+#define resume_kernel		restore_all
+#endif
+
+	.macro __cli
+	and	~EPSW_IM,epsw
+	or	EPSW_IE|MN10300_CLI_LEVEL,epsw
+	nop
+	nop
+	nop
+	.endm
+	.macro __sti
+	or	EPSW_IE|EPSW_IM_7,epsw
+	.endm
+
+
+	.am33_2
+
+###############################################################################
+#
+# the return path for a forked child
+# - on entry, D0 holds the address of the previous task to run
+#
+###############################################################################
+ENTRY(ret_from_fork)
+	call	schedule_tail[],0
+	GET_THREAD_INFO a2
+
+	# return 0 to indicate child process
+	clr	d0
+	mov	d0,(REG_D0,fp)
+	jmp	syscall_exit
+
+###############################################################################
+#
+# system call handler
+#
+###############################################################################
+ENTRY(system_call)
+	add	-4,sp
+	SAVE_ALL
+	mov	d0,(REG_ORIG_D0,fp)
+	GET_THREAD_INFO a2
+	cmp	nr_syscalls,d0
+	bcc	syscall_badsys
+	btst	_TIF_SYSCALL_TRACE,(TI_flags,a2)
+	bne	syscall_trace_entry
+syscall_call:
+	add	d0,d0,a1
+	add	a1,a1
+	mov	(REG_A0,fp),d0
+	mov	(sys_call_table,a1),a0
+	calls	(a0)
+	mov	d0,(REG_D0,fp)
+syscall_exit:
+	# make sure we don't miss an interrupt setting need_resched or
+	# sigpending between sampling and the rti
+	__cli
+	mov	(TI_flags,a2),d2
+	btst	_TIF_ALLWORK_MASK,d2
+	bne	syscall_exit_work
+restore_all:
+	RESTORE_ALL
+
+###############################################################################
+#
+# perform work that needs to be done immediately before resumption and syscall
+# tracing
+#
+###############################################################################
+	ALIGN
+syscall_exit_work:
+	btst	_TIF_SYSCALL_TRACE,d2
+	beq	work_pending
+	__sti				# could let do_syscall_trace() call
+					# schedule() instead
+	mov	fp,d0
+	mov	1,d1
+	call	do_syscall_trace[],0	# do_syscall_trace(regs,entryexit)
+	jmp	resume_userspace
+
+	ALIGN
+work_pending:
+	btst	_TIF_NEED_RESCHED,d2
+	beq	work_notifysig
+
+work_resched:
+	call	schedule[],0
+
+	# make sure we don't miss an interrupt setting need_resched or
+	# sigpending between sampling and the rti
+	__cli
+
+	# is there any work to be done other than syscall tracing?
+	mov	(TI_flags,a2),d2
+	btst	_TIF_WORK_MASK,d2
+	beq	restore_all
+	btst	_TIF_NEED_RESCHED,d2
+	bne	work_resched
+
+	# deal with pending signals and notify-resume requests
+work_notifysig:
+	mov	fp,d0
+	mov	d2,d1
+	call	do_notify_resume[],0
+	jmp	resume_userspace
+
+	# perform syscall entry tracing
+syscall_trace_entry:
+	mov	-ENOSYS,d0
+	mov	d0,(REG_D0,fp)
+	mov	fp,d0
+	clr	d1
+	call	do_syscall_trace[],0
+	mov	(REG_ORIG_D0,fp),d0
+	mov	(REG_D1,fp),d1
+	cmp	nr_syscalls,d0
+	bcs	syscall_call
+	jmp	syscall_exit
+
+syscall_badsys:
+	mov	-ENOSYS,d0
+	mov	d0,(REG_D0,fp)
+	jmp	resume_userspace
+
+	# userspace resumption stub bypassing syscall exit tracing
+	.globl	ret_from_exception, ret_from_intr
+	ALIGN
+ret_from_exception:
+	preempt_stop
+ret_from_intr:
+	GET_THREAD_INFO a2
+	mov	(REG_EPSW,fp),d0	# need to deliver signals before
+					# returning to userspace
+	and	EPSW_nSL,d0
+	beq	resume_kernel		# returning to supervisor mode
+
+ENTRY(resume_userspace)
+	# make sure we don't miss an interrupt setting need_resched or
+	# sigpending between sampling and the rti
+	__cli
+
+	# is there any work to be done on int/exception return?
+	mov	(TI_flags,a2),d2
+	btst	_TIF_WORK_MASK,d2
+	bne	work_pending
+	jmp	restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+	mov	(TI_preempt_count,a2),d0	# non-zero preempt_count ?
+	cmp	0,d0
+	bne	restore_all
+
+need_resched:
+	btst	_TIF_NEED_RESCHED,(TI_flags,a2)
+	beq	restore_all
+	mov	(REG_EPSW,fp),d0
+	and	EPSW_IM,d0
+	cmp	EPSW_IM_7,d0		# interrupts off (exception path) ?
+	beq	restore_all
+	call	preempt_schedule_irq[],0
+	jmp	need_resched
+#endif
+
+
+###############################################################################
+#
+# IRQ handler entry point
+# - intended to be entered at multiple priorities
+#
+###############################################################################
+ENTRY(irq_handler)
+	add	-4,sp
+	SAVE_ALL
+
+	# it's not a syscall
+	mov	0xffffffff,d0
+	mov	d0,(REG_ORIG_D0,fp)
+
+	mov	fp,d0
+	call	do_IRQ[],0			# do_IRQ(regs)
+
+	jmp	ret_from_intr
+
+###############################################################################
+#
+# Monitor Signal handler entry point
+#
+###############################################################################
+ENTRY(monitor_signal)
+	movbu	(0xae000001),d1
+	cmp	1,d1
+	beq	monsignal
+	ret	[],0
+
+monsignal:
+	or	EPSW_NMID,epsw
+	mov	d0,a0
+	mov	a0,sp
+	mov	(REG_EPSW,fp),d1
+	and	~EPSW_nSL,d1
+	mov	d1,(REG_EPSW,fp)
+	movm	(sp),[d2,d3,a2,a3,exreg0,exreg1,exother]
+	mov	(sp),a1
+	mov	a1,usp
+	movm	(sp),[other]
+	add	4,sp
+here:	jmp	0x8e000008-here+0x8e000008
+
+###############################################################################
+#
+# Double Fault handler entry point
+# - note that there will not be a stack, D0/A0 will hold EPSW/PC as were
+#
+###############################################################################
+	.section .bss
+	.balign	THREAD_SIZE
+	.space	THREAD_SIZE
+__df_stack:
+	.previous
+
+ENTRY(double_fault)
+	mov	a0,(__df_stack-4)	# PC as was
+	mov	d0,(__df_stack-8)	# EPSW as was
+	mn10300_set_dbfleds		# display 'db-f' on the LEDs
+	mov	0xaa55aa55,d0
+	mov	d0,(__df_stack-12)	# no ORIG_D0
+	mov	sp,a0			# save corrupted SP
+	mov	__df_stack-12,sp	# emergency supervisor stack
+	SAVE_ALL
+	mov	a0,(REG_A0,fp)		# save corrupted SP as A0 (which got
+					# clobbered by the CPU)
+	mov	fp,d0
+	calls	do_double_fault
+double_fault_loop:
+	bra	double_fault_loop
+
+###############################################################################
+#
+# Bus Error handler entry point
+# - handle external (async) bus errors separately
+#
+###############################################################################
+ENTRY(raw_bus_error)
+	add	-4,sp
+	mov	d0,(sp)
+	mov	(BCBERR),d0		# what
+	btst	BCBERR_BEMR_DMA,d0	# see if it was an external bus error
+	beq	__common_exception_aux	# it wasn't
+
+	SAVE_ALL
+	mov	(BCBEAR),d1		# destination of erroneous access
+
+	mov	(REG_ORIG_D0,fp),d2
+	mov	d2,(REG_D0,fp)
+	mov	-1,d2
+	mov	d2,(REG_ORIG_D0,fp)
+
+	add	-4,sp
+	mov	fp,(12,sp)		# frame pointer
+	call	io_bus_error[],0
+	jmp	restore_all
+
+###############################################################################
+#
+# Miscellaneous exception entry points
+#
+###############################################################################
+ENTRY(nmi_handler)
+	add	-4,sp
+	mov	d0,(sp)
+	mov	(TBR),d0
+	bra	__common_exception_nonmi
+
+ENTRY(__common_exception)
+	add	-4,sp
+	mov	d0,(sp)
+
+__common_exception_aux:
+	mov	(TBR),d0
+	and	~EPSW_NMID,epsw		# turn NMIs back on if not NMI
+	or	EPSW_IE,epsw
+
+__common_exception_nonmi:
+	and	0x0000FFFF,d0		# turn the exception code into a vector
+					# table index
+
+	btst	0x00000007,d0
+	bne	1f
+	cmp	0x00000400,d0
+	bge	1f
+
+	SAVE_ALL			# build the stack frame
+
+	mov	(REG_D0,fp),a2		# get the exception number
+	mov	(REG_ORIG_D0,fp),d0
+	mov	d0,(REG_D0,fp)
+	mov	-1,d0
+	mov	d0,(REG_ORIG_D0,fp)
+
+#ifdef CONFIG_GDBSTUB
+	btst	0x01,(gdbstub_busy)
+	beq	2f
+	and	~EPSW_IE,epsw
+	mov	fp,d0
+	mov	a2,d1
+	call	gdbstub_exception[],0	# gdbstub itself caused an exception
+	bra	restore_all
+2:
+#endif
+
+	mov	fp,d0			# arg 0: stacked register file
+	mov	a2,d1			# arg 1: exception number
+	lsr	1,a2
+
+	mov	(exception_table,a2),a2
+	calls	(a2)
+	jmp	ret_from_exception
+
+1:	pi				# BUG() equivalent
+
+###############################################################################
+#
+# Exception handler functions table
+#
+###############################################################################
+	.data
+ENTRY(exception_table)
+	.rept	0x400>>1
+	 .long	uninitialised_exception
+	.endr
+	.previous
+
+###############################################################################
+#
+# Change an entry in the exception table
+# - D0 exception code, D1 handler
+#
+###############################################################################
+ENTRY(set_excp_vector)
+	lsr	1,d0
+	add	exception_table,d0
+	mov	d1,(d0)
+	mov	4,d1
+#if defined(CONFIG_MN10300_CACHE_WBACK)
+	jmp	mn10300_dcache_flush_inv_range2
+#else
+	ret	[],0
+#endif
+
+###############################################################################
+#
+# System call table
+#
+###############################################################################
+	.data
+ENTRY(sys_call_table)
+	.long sys_restart_syscall	/* 0 */
+	.long sys_exit
+	.long sys_fork
+	.long sys_read
+	.long sys_write
+	.long sys_open		/* 5 */
+	.long sys_close
+	.long sys_waitpid
+	.long sys_creat
+	.long sys_link
+	.long sys_unlink	/* 10 */
+	.long sys_execve
+	.long sys_chdir
+	.long sys_time
+	.long sys_mknod
+	.long sys_chmod		/* 15 */
+	.long sys_lchown16
+	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_stat
+	.long sys_lseek
+	.long sys_getpid	/* 20 */
+	.long sys_mount
+	.long sys_oldumount
+	.long sys_setuid16
+	.long sys_getuid16
+	.long sys_stime		/* 25 */
+	.long sys_ptrace
+	.long sys_alarm
+	.long sys_fstat
+	.long sys_pause
+	.long sys_utime		/* 30 */
+	.long sys_ni_syscall	/* old stty syscall holder */
+	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_access
+	.long sys_nice
+	.long sys_ni_syscall	/* 35 - old ftime syscall holder */
+	.long sys_sync
+	.long sys_kill
+	.long sys_rename
+	.long sys_mkdir
+	.long sys_rmdir		/* 40 */
+	.long sys_dup
+	.long sys_pipe
+	.long sys_times
+	.long sys_ni_syscall	/* old prof syscall holder */
+	.long sys_brk		/* 45 */
+	.long sys_setgid16
+	.long sys_getgid16
+	.long sys_signal
+	.long sys_geteuid16
+	.long sys_getegid16	/* 50 */
+	.long sys_acct
+	.long sys_umount	/* recycled never used phys() */
+	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_ioctl
+	.long sys_fcntl		/* 55 */
+	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_setpgid
+	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall	/* old sys_olduname */
+	.long sys_umask		/* 60 */
+	.long sys_chroot
+	.long sys_ustat
+	.long sys_dup2
+	.long sys_getppid
+	.long sys_getpgrp	/* 65 */
+	.long sys_setsid
+	.long sys_sigaction
+	.long sys_sgetmask
+	.long sys_ssetmask
+	.long sys_setreuid16	/* 70 */
+	.long sys_setregid16
+	.long sys_sigsuspend
+	.long sys_sigpending
+	.long sys_sethostname
+	.long sys_setrlimit	/* 75 */
+	.long sys_old_getrlimit
+	.long sys_getrusage
+	.long sys_gettimeofday
+	.long sys_settimeofday
+	.long sys_getgroups16	/* 80 */
+	.long sys_setgroups16
+	.long old_select
+	.long sys_symlink
+	.long sys_lstat
+	.long sys_readlink	/* 85 */
+	.long sys_uselib
+	.long sys_swapon
+	.long sys_reboot
+	.long old_readdir
+	.long old_mmap		/* 90 */
+	.long sys_munmap
+	.long sys_truncate
+	.long sys_ftruncate
+	.long sys_fchmod
+	.long sys_fchown16	/* 95 */
+	.long sys_getpriority
+	.long sys_setpriority
+	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_statfs
+	.long sys_fstatfs	/* 100 */
+	.long sys_ni_syscall	/* ioperm */
+	.long sys_socketcall
+	.long sys_syslog
+	.long sys_setitimer
+	.long sys_getitimer	/* 105 */
+	.long sys_newstat
+	.long sys_newlstat
+	.long sys_newfstat
+	.long sys_ni_syscall	/* old sys_uname */
+	.long sys_ni_syscall	/* 110 - iopl */
+	.long sys_vhangup
+	.long sys_ni_syscall	/* old "idle" system call */
+	.long sys_ni_syscall	/* vm86old */
+	.long sys_wait4
+	.long sys_swapoff	/* 115 */
+	.long sys_sysinfo
+	.long sys_ipc
+	.long sys_fsync
+	.long sys_sigreturn
+	.long sys_clone		/* 120 */
+	.long sys_setdomainname
+	.long sys_newuname
+	.long sys_ni_syscall	/* modify_ldt */
+	.long sys_adjtimex
+	.long sys_mprotect	/* 125 */
+	.long sys_sigprocmask
+	.long sys_ni_syscall	/* old "create_module" */
+	.long sys_init_module
+	.long sys_delete_module
+	.long sys_ni_syscall	/* 130:	old "get_kernel_syms" */
+	.long sys_quotactl
+	.long sys_getpgid
+	.long sys_fchdir
+	.long sys_bdflush
+	.long sys_sysfs		/* 135 */
+	.long sys_personality
+	.long sys_ni_syscall	/* reserved for afs_syscall */
+	.long sys_setfsuid16
+	.long sys_setfsgid16
+	.long sys_llseek	/* 140 */
+	.long sys_getdents
+	.long sys_select
+	.long sys_flock
+	.long sys_msync
+	.long sys_readv		/* 145 */
+	.long sys_writev
+	.long sys_getsid
+	.long sys_fdatasync
+	.long sys_sysctl
+	.long sys_mlock		/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
+	.long sys_sched_setparam
+	.long sys_sched_getparam   /* 155 */
+	.long sys_sched_setscheduler
+	.long sys_sched_getscheduler
+	.long sys_sched_yield
+	.long sys_sched_get_priority_max
+	.long sys_sched_get_priority_min  /* 160 */
+	.long sys_sched_rr_get_interval
+	.long sys_nanosleep
+	.long sys_mremap
+	.long sys_setresuid16
+	.long sys_getresuid16	/* 165 */
+	.long sys_ni_syscall	/* vm86 */
+	.long sys_ni_syscall	/* Old sys_query_module */
+	.long sys_poll
+	.long sys_nfsservctl
+	.long sys_setresgid16	/* 170 */
+	.long sys_getresgid16
+	.long sys_prctl
+	.long sys_rt_sigreturn
+	.long sys_rt_sigaction
+	.long sys_rt_sigprocmask	/* 175 */
+	.long sys_rt_sigpending
+	.long sys_rt_sigtimedwait
+	.long sys_rt_sigqueueinfo
+	.long sys_rt_sigsuspend
+	.long sys_pread64	/* 180 */
+	.long sys_pwrite64
+	.long sys_chown16
+	.long sys_getcwd
+	.long sys_capget
+	.long sys_capset	/* 185 */
+	.long sys_sigaltstack
+	.long sys_sendfile
+	.long sys_ni_syscall	/* reserved for streams1 */
+	.long sys_ni_syscall	/* reserved for streams2 */
+	.long sys_vfork		/* 190 */
+	.long sys_getrlimit
+	.long sys_mmap2
+	.long sys_truncate64
+	.long sys_ftruncate64
+	.long sys_stat64	/* 195 */
+	.long sys_lstat64
+	.long sys_fstat64
+	.long sys_lchown
+	.long sys_getuid
+	.long sys_getgid	/* 200 */
+	.long sys_geteuid
+	.long sys_getegid
+	.long sys_setreuid
+	.long sys_setregid
+	.long sys_getgroups	/* 205 */
+	.long sys_setgroups
+	.long sys_fchown
+	.long sys_setresuid
+	.long sys_getresuid
+	.long sys_setresgid	/* 210 */
+	.long sys_getresgid
+	.long sys_chown
+	.long sys_setuid
+	.long sys_setgid
+	.long sys_setfsuid	/* 215 */
+	.long sys_setfsgid
+	.long sys_pivot_root
+	.long sys_mincore
+	.long sys_madvise
+	.long sys_getdents64	/* 220 */
+	.long sys_fcntl64
+	.long sys_ni_syscall	/* reserved for TUX */
+	.long sys_ni_syscall
+	.long sys_gettid
+	.long sys_readahead	/* 225 */
+	.long sys_setxattr
+	.long sys_lsetxattr
+	.long sys_fsetxattr
+	.long sys_getxattr
+	.long sys_lgetxattr	/* 230 */
+	.long sys_fgetxattr
+	.long sys_listxattr
+	.long sys_llistxattr
+	.long sys_flistxattr
+	.long sys_removexattr	/* 235 */
+	.long sys_lremovexattr
+	.long sys_fremovexattr
+	.long sys_tkill
+	.long sys_sendfile64
+	.long sys_futex		/* 240 */
+	.long sys_sched_setaffinity
+	.long sys_sched_getaffinity
+	.long sys_ni_syscall	/* sys_set_thread_area */
+	.long sys_ni_syscall	/* sys_get_thread_area */
+	.long sys_io_setup	/* 245 */
+	.long sys_io_destroy
+	.long sys_io_getevents
+	.long sys_io_submit
+	.long sys_io_cancel
+	.long sys_fadvise64	/* 250 */
+	.long sys_ni_syscall
+	.long sys_exit_group
+	.long sys_lookup_dcookie
+	.long sys_epoll_create
+	.long sys_epoll_ctl	/* 255 */
+	.long sys_epoll_wait
+ 	.long sys_remap_file_pages
+ 	.long sys_set_tid_address
+ 	.long sys_timer_create
+ 	.long sys_timer_settime		/* 260 */
+ 	.long sys_timer_gettime
+ 	.long sys_timer_getoverrun
+ 	.long sys_timer_delete
+ 	.long sys_clock_settime
+ 	.long sys_clock_gettime		/* 265 */
+ 	.long sys_clock_getres
+ 	.long sys_clock_nanosleep
+	.long sys_statfs64
+	.long sys_fstatfs64
+	.long sys_tgkill		/* 270 */
+	.long sys_utimes
+ 	.long sys_fadvise64_64
+	.long sys_ni_syscall	/* sys_vserver */
+	.long sys_mbind
+	.long sys_get_mempolicy		/* 275 */
+	.long sys_set_mempolicy
+	.long sys_mq_open
+	.long sys_mq_unlink
+	.long sys_mq_timedsend
+	.long sys_mq_timedreceive	/* 280 */
+	.long sys_mq_notify
+	.long sys_mq_getsetattr
+	.long sys_kexec_load
+	.long sys_waitid
+	.long sys_ni_syscall		/* 285 */ /* available */
+	.long sys_add_key
+	.long sys_request_key
+	.long sys_keyctl
+	.long sys_cacheflush
+	.long sys_ioprio_set		/* 290 */
+	.long sys_ioprio_get
+	.long sys_inotify_init
+	.long sys_inotify_add_watch
+	.long sys_inotify_rm_watch
+	.long sys_migrate_pages		/* 295 */
+	.long sys_openat
+	.long sys_mkdirat
+	.long sys_mknodat
+	.long sys_fchownat
+	.long sys_futimesat		/* 300 */
+	.long sys_fstatat64
+	.long sys_unlinkat
+	.long sys_renameat
+	.long sys_linkat
+	.long sys_symlinkat		/* 305 */
+	.long sys_readlinkat
+	.long sys_fchmodat
+	.long sys_faccessat
+	.long sys_pselect6
+	.long sys_ppoll			/* 310 */
+	.long sys_unshare
+	.long sys_set_robust_list
+	.long sys_get_robust_list
+	.long sys_splice
+	.long sys_sync_file_range	/* 315 */
+	.long sys_tee
+	.long sys_vmsplice
+	.long sys_move_pages
+	.long sys_getcpu
+	.long sys_epoll_pwait		/* 320 */
+	.long sys_utimensat
+	.long sys_signalfd
+	.long sys_timerfd_create
+	.long sys_eventfd
+	.long sys_fallocate		/* 325 */
+	.long sys_timerfd_settime
+	.long sys_timerfd_gettime
+
+
+nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/mn10300/kernel/fpu-low.S b/arch/mn10300/kernel/fpu-low.S
new file mode 100644
index 000000000000..96cfd47e68d5
--- /dev/null
+++ b/arch/mn10300/kernel/fpu-low.S
@@ -0,0 +1,197 @@
+/* MN10300 Low level FPU management operations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cpu-regs.h>
+
+###############################################################################
+#
+# void fpu_init_state(void)
+# - initialise the FPU
+#
+###############################################################################
+	.globl	fpu_init_state
+	.type	fpu_init_state,@function
+fpu_init_state:
+	mov	epsw,d0
+	or	EPSW_FE,epsw
+
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+	nop
+#endif
+	fmov	0,fs0
+	fmov	fs0,fs1
+	fmov	fs0,fs2
+	fmov	fs0,fs3
+	fmov	fs0,fs4
+	fmov	fs0,fs5
+	fmov	fs0,fs6
+	fmov	fs0,fs7
+	fmov	fs0,fs8
+	fmov	fs0,fs9
+	fmov	fs0,fs10
+	fmov	fs0,fs11
+	fmov	fs0,fs12
+	fmov	fs0,fs13
+	fmov	fs0,fs14
+	fmov	fs0,fs15
+	fmov	fs0,fs16
+	fmov	fs0,fs17
+	fmov	fs0,fs18
+	fmov	fs0,fs19
+	fmov	fs0,fs20
+	fmov	fs0,fs21
+	fmov	fs0,fs22
+	fmov	fs0,fs23
+	fmov	fs0,fs24
+	fmov	fs0,fs25
+	fmov	fs0,fs26
+	fmov	fs0,fs27
+	fmov	fs0,fs28
+	fmov	fs0,fs29
+	fmov	fs0,fs30
+	fmov	fs0,fs31
+	fmov	FPCR_INIT,fpcr
+
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+	nop
+#endif
+	mov	d0,epsw
+	ret	[],0
+
+	.size	fpu_init_state,.-fpu_init_state
+
+###############################################################################
+#
+# void fpu_save(struct fpu_state_struct *)
+# - save the fpu state
+# - note that an FPU Operational exception might occur during this process
+#
+###############################################################################
+	.globl	fpu_save
+	.type	fpu_save,@function
+fpu_save:
+	mov	epsw,d1
+	or	EPSW_FE,epsw		/* enable the FPU so we can access it */
+
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+#endif
+	mov	d0,a0
+	fmov	fs0,(a0+)
+	fmov	fs1,(a0+)
+	fmov	fs2,(a0+)
+	fmov	fs3,(a0+)
+	fmov	fs4,(a0+)
+	fmov	fs5,(a0+)
+	fmov	fs6,(a0+)
+	fmov	fs7,(a0+)
+	fmov	fs8,(a0+)
+	fmov	fs9,(a0+)
+	fmov	fs10,(a0+)
+	fmov	fs11,(a0+)
+	fmov	fs12,(a0+)
+	fmov	fs13,(a0+)
+	fmov	fs14,(a0+)
+	fmov	fs15,(a0+)
+	fmov	fs16,(a0+)
+	fmov	fs17,(a0+)
+	fmov	fs18,(a0+)
+	fmov	fs19,(a0+)
+	fmov	fs20,(a0+)
+	fmov	fs21,(a0+)
+	fmov	fs22,(a0+)
+	fmov	fs23,(a0+)
+	fmov	fs24,(a0+)
+	fmov	fs25,(a0+)
+	fmov	fs26,(a0+)
+	fmov	fs27,(a0+)
+	fmov	fs28,(a0+)
+	fmov	fs29,(a0+)
+	fmov	fs30,(a0+)
+	fmov	fs31,(a0+)
+	fmov	fpcr,d0
+	mov	d0,(a0)
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+#endif
+
+	mov	d1,epsw
+	ret	[],0
+
+	.size	fpu_save,.-fpu_save
+
+###############################################################################
+#
+# void fpu_restore(struct fpu_state_struct *)
+# - restore the fpu state
+# - note that an FPU Operational exception might occur during this process
+#
+###############################################################################
+	.globl	fpu_restore
+	.type	fpu_restore,@function
+fpu_restore:
+	mov	epsw,d1
+	or	EPSW_FE,epsw		/* enable the FPU so we can access it */
+
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+#endif
+	mov	d0,a0
+	fmov	(a0+),fs0
+	fmov	(a0+),fs1
+	fmov	(a0+),fs2
+	fmov	(a0+),fs3
+	fmov	(a0+),fs4
+	fmov	(a0+),fs5
+	fmov	(a0+),fs6
+	fmov	(a0+),fs7
+	fmov	(a0+),fs8
+	fmov	(a0+),fs9
+	fmov	(a0+),fs10
+	fmov	(a0+),fs11
+	fmov	(a0+),fs12
+	fmov	(a0+),fs13
+	fmov	(a0+),fs14
+	fmov	(a0+),fs15
+	fmov	(a0+),fs16
+	fmov	(a0+),fs17
+	fmov	(a0+),fs18
+	fmov	(a0+),fs19
+	fmov	(a0+),fs20
+	fmov	(a0+),fs21
+	fmov	(a0+),fs22
+	fmov	(a0+),fs23
+	fmov	(a0+),fs24
+	fmov	(a0+),fs25
+	fmov	(a0+),fs26
+	fmov	(a0+),fs27
+	fmov	(a0+),fs28
+	fmov	(a0+),fs29
+	fmov	(a0+),fs30
+	fmov	(a0+),fs31
+	mov	(a0),d0
+	fmov	d0,fpcr
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	nop
+	nop
+	nop
+#endif
+
+	mov	d1,epsw
+	ret	[],0
+
+	.size	fpu_restore,.-fpu_restore
diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c
new file mode 100644
index 000000000000..e705f25ad5ff
--- /dev/null
+++ b/arch/mn10300/kernel/fpu.c
@@ -0,0 +1,223 @@
+/* MN10300 FPU management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/uaccess.h>
+#include <asm/fpu.h>
+#include <asm/elf.h>
+#include <asm/exceptions.h>
+
+struct task_struct *fpu_state_owner;
+
+/*
+ * handle an exception due to the FPU being disabled
+ */
+asmlinkage void fpu_disabled(struct pt_regs *regs, enum exception_code code)
+{
+	struct task_struct *tsk = current;
+
+	if (!user_mode(regs))
+		die_if_no_fixup("An FPU Disabled exception happened in"
+				" kernel space\n",
+				regs, code);
+
+#ifdef CONFIG_FPU
+	preempt_disable();
+
+	/* transfer the last process's FPU state to memory */
+	if (fpu_state_owner) {
+		fpu_save(&fpu_state_owner->thread.fpu_state);
+		fpu_state_owner->thread.uregs->epsw &= ~EPSW_FE;
+	}
+
+	/* the current process now owns the FPU state */
+	fpu_state_owner = tsk;
+	regs->epsw |= EPSW_FE;
+
+	/* load the FPU with the current process's FPU state or invent a new
+	 * clean one if the process doesn't have one */
+	if (is_using_fpu(tsk)) {
+		fpu_restore(&tsk->thread.fpu_state);
+	} else {
+		fpu_init_state();
+		set_using_fpu(tsk);
+	}
+
+	preempt_enable();
+#else
+	{
+		siginfo_t info;
+
+		info.si_signo = SIGFPE;
+		info.si_errno = 0;
+		info.si_addr = (void *) tsk->thread.uregs->pc;
+		info.si_code = FPE_FLTINV;
+
+		force_sig_info(SIGFPE, &info, tsk);
+	}
+#endif  /* CONFIG_FPU */
+}
+
+/*
+ * handle an FPU operational exception
+ * - there's a possibility that if the FPU is asynchronous, the signal might
+ *   be meant for a process other than the current one
+ */
+asmlinkage void fpu_exception(struct pt_regs *regs, enum exception_code code)
+{
+	struct task_struct *tsk = fpu_state_owner;
+	siginfo_t info;
+
+	if (!user_mode(regs))
+		die_if_no_fixup("An FPU Operation exception happened in"
+				" kernel space\n",
+				regs, code);
+
+	if (!tsk)
+		die_if_no_fixup("An FPU Operation exception happened,"
+				" but the FPU is not in use",
+				regs, code);
+
+	info.si_signo = SIGFPE;
+	info.si_errno = 0;
+	info.si_addr = (void *) tsk->thread.uregs->pc;
+	info.si_code = FPE_FLTINV;
+
+#ifdef CONFIG_FPU
+	{
+		u32 fpcr;
+
+		/* get FPCR (we need to enable the FPU whilst we do this) */
+		asm volatile("	or	%1,epsw		\n"
+#ifdef CONFIG_MN10300_PROC_MN103E010
+			     "	nop			\n"
+			     "	nop			\n"
+			     "	nop			\n"
+#endif
+			     "	fmov	fpcr,%0		\n"
+#ifdef CONFIG_MN10300_PROC_MN103E010
+			     "	nop			\n"
+			     "	nop			\n"
+			     "	nop			\n"
+#endif
+			     "	and	%2,epsw		\n"
+			     : "=&d"(fpcr)
+			     : "i"(EPSW_FE), "i"(~EPSW_FE)
+			     );
+
+		if (fpcr & FPCR_EC_Z)
+			info.si_code = FPE_FLTDIV;
+		else if	(fpcr & FPCR_EC_O)
+			info.si_code = FPE_FLTOVF;
+		else if	(fpcr & FPCR_EC_U)
+			info.si_code = FPE_FLTUND;
+		else if	(fpcr & FPCR_EC_I)
+			info.si_code = FPE_FLTRES;
+	}
+#endif
+
+	force_sig_info(SIGFPE, &info, tsk);
+}
+
+/*
+ * save the FPU state to a signal context
+ */
+int fpu_setup_sigcontext(struct fpucontext *fpucontext)
+{
+#ifdef CONFIG_FPU
+	struct task_struct *tsk = current;
+
+	if (!is_using_fpu(tsk))
+		return 0;
+
+	/* transfer the current FPU state to memory and cause fpu_init() to be
+	 * triggered by the next attempted FPU operation by the current
+	 * process.
+	 */
+	preempt_disable();
+
+	if (fpu_state_owner == tsk) {
+		fpu_save(&tsk->thread.fpu_state);
+		fpu_state_owner->thread.uregs->epsw &= ~EPSW_FE;
+		fpu_state_owner = NULL;
+	}
+
+	preempt_enable();
+
+	/* we no longer have a valid current FPU state */
+	clear_using_fpu(tsk);
+
+	/* transfer the saved FPU state onto the userspace stack */
+	if (copy_to_user(fpucontext,
+			 &tsk->thread.fpu_state,
+			 min(sizeof(struct fpu_state_struct),
+			     sizeof(struct fpucontext))))
+		return -1;
+
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+/*
+ * kill a process's FPU state during restoration after signal handling
+ */
+void fpu_kill_state(struct task_struct *tsk)
+{
+#ifdef CONFIG_FPU
+	/* disown anything left in the FPU */
+	preempt_disable();
+
+	if (fpu_state_owner == tsk) {
+		fpu_state_owner->thread.uregs->epsw &= ~EPSW_FE;
+		fpu_state_owner = NULL;
+	}
+
+	preempt_enable();
+#endif
+	/* we no longer have a valid current FPU state */
+	clear_using_fpu(tsk);
+}
+
+/*
+ * restore the FPU state from a signal context
+ */
+int fpu_restore_sigcontext(struct fpucontext *fpucontext)
+{
+	struct task_struct *tsk = current;
+	int ret;
+
+	/* load up the old FPU state */
+	ret = copy_from_user(&tsk->thread.fpu_state,
+			     fpucontext,
+			     min(sizeof(struct fpu_state_struct),
+				 sizeof(struct fpucontext)));
+	if (!ret)
+		set_using_fpu(tsk);
+
+	return ret;
+}
+
+/*
+ * fill in the FPU structure for a core dump
+ */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpreg)
+{
+	struct task_struct *tsk = current;
+	int fpvalid;
+
+	fpvalid = is_using_fpu(tsk);
+	if (fpvalid) {
+		unlazy_fpu(tsk);
+		memcpy(fpreg, &tsk->thread.fpu_state, sizeof(*fpreg));
+	}
+
+	return fpvalid;
+}
diff --git a/arch/mn10300/kernel/gdb-cache.S b/arch/mn10300/kernel/gdb-cache.S
new file mode 100644
index 000000000000..1108badc3d32
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-cache.S
@@ -0,0 +1,105 @@
+###############################################################################
+#
+# MN10300 Low-level cache purging routines for gdbstub
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/cpu-regs.h>
+#include <asm/exceptions.h>
+#include <asm/frame.inc>
+#include <asm/serial-regs.h>
+
+	.text
+
+###############################################################################
+#
+# GDB stub cache purge
+#
+###############################################################################
+	.type	gdbstub_purge_cache,@function
+ENTRY(gdbstub_purge_cache)
+	#######################################################################
+	# read the addresses tagged in the cache's tag RAM and attempt to flush
+	# those addresses specifically
+	# - we rely on the hardware to filter out invalid tag entry addresses
+	mov	DCACHE_TAG(0,0),a0		# dcache tag RAM access address
+	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
+	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1  # total number of entries
+
+mn10300_dcache_flush_loop:
+	mov	(a0),d0
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+	or	L1_CACHE_TAG_VALID,d0		# retain valid entries in the
+						# cache
+	mov	d0,(a1)				# conditional purge
+
+mn10300_dcache_flush_skip:
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	add	-1,d1
+	bne	mn10300_dcache_flush_loop
+
+;; 	# unconditionally flush and invalidate the dcache
+;; 	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
+;; 	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1	# total number of
+;;							# entries
+;;
+;; gdbstub_purge_cache__dcache_loop:
+;; 	mov	(a1),d0				# unconditional purge
+;;
+;; 	add	L1_CACHE_BYTES,a1
+;; 	add	-1,d1
+;; 	bne	gdbstub_purge_cache__dcache_loop
+
+	#######################################################################
+	# now invalidate the icache
+	mov	CHCTR,a0
+	movhu	(a0),a1
+
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	nop
+	nop
+
+	# disable the icache
+	and	~CHCTR_ICEN,d0
+	movhu	d0,(a0)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# invalidate
+	or	CHCTR_ICINV,d0
+	movhu	d0,(a0)
+
+	# wait for the cache to finish
+	mov	CHCTR,a0
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# and reenable it
+	movhu	a1,(a0)
+	movhu	(a0),d0			# read back to flush
+					# (SIGILLs all over without this)
+
+	mov	d1,epsw
+
+	ret	[],0
+
+	.size	gdbstub_purge_cache,.-gdbstub_purge_cache
diff --git a/arch/mn10300/kernel/gdb-io-serial-low.S b/arch/mn10300/kernel/gdb-io-serial-low.S
new file mode 100644
index 000000000000..c68dcd052201
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-io-serial-low.S
@@ -0,0 +1,90 @@
+###############################################################################
+#
+# 16550 serial Rx interrupt handler for gdbstub I/O
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/cpu-regs.h>
+#include <asm/thread_info.h>
+#include <asm/frame.inc>
+#include <asm/intctl-regs.h>
+#include <asm/unit/serial.h>
+
+	.text
+
+###############################################################################
+#
+# GDB stub serial receive interrupt entry point
+# - intended to run at interrupt priority 0
+#
+###############################################################################
+	.globl	gdbstub_io_rx_handler
+	.type	gdbstub_io_rx_handler,@function
+gdbstub_io_rx_handler:
+	movm	[d2,d3,a2,a3],(sp)
+
+#if 1
+	movbu	(GDBPORT_SERIAL_IIR),d2
+#endif
+
+	mov	(gdbstub_rx_inp),a3
+gdbstub_io_rx_more:
+	mov	a3,a2
+	add	2,a3
+	and	0x00000fff,a3
+	mov	(gdbstub_rx_outp),d3
+	cmp	a3,d3
+	beq	gdbstub_io_rx_overflow
+
+	movbu	(GDBPORT_SERIAL_LSR),d3
+	btst	UART_LSR_DR,d3
+	beq	gdbstub_io_rx_done
+	movbu	(GDBPORT_SERIAL_RX),d2
+	movbu	d3,(gdbstub_rx_buffer+1,a2)
+	movbu	d2,(gdbstub_rx_buffer,a2)
+	mov	a3,(gdbstub_rx_inp)
+	bra	gdbstub_io_rx_more
+
+gdbstub_io_rx_done:
+	mov	GxICR_DETECT,d2
+	movbu	d2,(XIRQxICR(GDBPORT_SERIAL_IRQ))	# ACK the interrupt
+	movhu	(XIRQxICR(GDBPORT_SERIAL_IRQ)),d2	# flush
+	movm	(sp),[d2,d3,a2,a3]
+	bset	0x01,(gdbstub_busy)
+	beq	gdbstub_io_rx_enter
+	rti
+
+gdbstub_io_rx_overflow:
+	bset	0x01,(gdbstub_rx_overflow)
+	bra	gdbstub_io_rx_done
+
+gdbstub_io_rx_enter:
+	or	EPSW_IE|EPSW_IM_1,epsw
+	add	-4,sp
+	SAVE_ALL
+
+	mov	0xffffffff,d0
+	mov	d0,(REG_ORIG_D0,fp)
+	mov	0x280,d1
+
+	mov	fp,d0
+	call	gdbstub_rx_irq[],0	# gdbstub_rx_irq(regs,excep)
+
+	and	~EPSW_IE,epsw
+	bclr	0x01,(gdbstub_busy)
+
+	.globl gdbstub_return
+gdbstub_return:
+	RESTORE_ALL
+
+	.size	gdbstub_io_rx_handler,.-gdbstub_io_rx_handler
diff --git a/arch/mn10300/kernel/gdb-io-serial.c b/arch/mn10300/kernel/gdb-io-serial.c
new file mode 100644
index 000000000000..9a6d4e8ebe73
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-io-serial.c
@@ -0,0 +1,155 @@
+/* 16550 serial driver for gdbstub I/O
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/gdb-stub.h>
+#include <asm/exceptions.h>
+#include <asm/serial-regs.h>
+#include <asm/unit/serial.h>
+
+/*
+ * initialise the GDB stub
+ */
+void gdbstub_io_init(void)
+{
+	u16 tmp;
+
+	/* set up the serial port */
+	GDBPORT_SERIAL_LCR = UART_LCR_WLEN8; /* 1N8 */
+	GDBPORT_SERIAL_FCR = (UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR |
+			      UART_FCR_CLEAR_XMIT);
+
+	FLOWCTL_CLEAR(DTR);
+	FLOWCTL_SET(RTS);
+
+	gdbstub_io_set_baud(115200);
+
+	/* we want to get serial receive interrupts */
+	XIRQxICR(GDBPORT_SERIAL_IRQ) = 0;
+	tmp = XIRQxICR(GDBPORT_SERIAL_IRQ);
+
+	IVAR0 = EXCEP_IRQ_LEVEL0;
+	set_intr_stub(EXCEP_IRQ_LEVEL0, gdbstub_io_rx_handler);
+
+	XIRQxICR(GDBPORT_SERIAL_IRQ) &= ~GxICR_REQUEST;
+	XIRQxICR(GDBPORT_SERIAL_IRQ) = GxICR_ENABLE | GxICR_LEVEL_0;
+	tmp = XIRQxICR(GDBPORT_SERIAL_IRQ);
+
+	GDBPORT_SERIAL_IER = UART_IER_RDI | UART_IER_RLSI;
+
+	/* permit level 0 IRQs to take place */
+	asm volatile(
+		"	and %0,epsw	\n"
+		"	or %1,epsw	\n"
+		:
+		: "i"(~EPSW_IM), "i"(EPSW_IE | EPSW_IM_1)
+		);
+}
+
+/*
+ * set up the GDB stub serial port baud rate timers
+ */
+void gdbstub_io_set_baud(unsigned baud)
+{
+	unsigned value;
+	u8 lcr;
+
+	value = 18432000 / 16 / baud;
+
+	lcr = GDBPORT_SERIAL_LCR;
+	GDBPORT_SERIAL_LCR |= UART_LCR_DLAB;
+	GDBPORT_SERIAL_DLL = value & 0xff;
+	GDBPORT_SERIAL_DLM = (value >> 8) & 0xff;
+	GDBPORT_SERIAL_LCR = lcr;
+}
+
+/*
+ * wait for a character to come from the debugger
+ */
+int gdbstub_io_rx_char(unsigned char *_ch, int nonblock)
+{
+	unsigned ix;
+	u8 ch, st;
+
+	*_ch = 0xff;
+
+	if (gdbstub_rx_unget) {
+		*_ch = gdbstub_rx_unget;
+		gdbstub_rx_unget = 0;
+		return 0;
+	}
+
+ try_again:
+	/* pull chars out of the buffer */
+	ix = gdbstub_rx_outp;
+	if (ix == gdbstub_rx_inp) {
+		if (nonblock)
+			return -EAGAIN;
+#ifdef CONFIG_MN10300_WD_TIMER
+		watchdog_alert_counter = 0;
+#endif /* CONFIG_MN10300_WD_TIMER */
+		goto try_again;
+	}
+
+	ch = gdbstub_rx_buffer[ix++];
+	st = gdbstub_rx_buffer[ix++];
+	gdbstub_rx_outp = ix & 0x00000fff;
+
+	if (st & UART_LSR_BI) {
+		gdbstub_proto("### GDB Rx Break Detected ###\n");
+		return -EINTR;
+	} else if (st & (UART_LSR_FE | UART_LSR_OE | UART_LSR_PE)) {
+		gdbstub_proto("### GDB Rx Error (st=%02x) ###\n", st);
+		return -EIO;
+	} else {
+		gdbstub_proto("### GDB Rx %02x (st=%02x) ###\n", ch, st);
+		*_ch = ch & 0x7f;
+		return 0;
+	}
+}
+
+/*
+ * send a character to the debugger
+ */
+void gdbstub_io_tx_char(unsigned char ch)
+{
+	FLOWCTL_SET(DTR);
+	LSR_WAIT_FOR(THRE);
+	/* FLOWCTL_WAIT_FOR(CTS); */
+
+	if (ch == 0x0a) {
+		GDBPORT_SERIAL_TX = 0x0d;
+		LSR_WAIT_FOR(THRE);
+		/* FLOWCTL_WAIT_FOR(CTS); */
+	}
+	GDBPORT_SERIAL_TX = ch;
+
+	FLOWCTL_CLEAR(DTR);
+}
+
+/*
+ * send a character to the debugger
+ */
+void gdbstub_io_tx_flush(void)
+{
+	LSR_WAIT_FOR(TEMT);
+	LSR_WAIT_FOR(THRE);
+	FLOWCTL_CLEAR(DTR);
+}
diff --git a/arch/mn10300/kernel/gdb-io-ttysm-low.S b/arch/mn10300/kernel/gdb-io-ttysm-low.S
new file mode 100644
index 000000000000..677c7876307c
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-io-ttysm-low.S
@@ -0,0 +1,93 @@
+###############################################################################
+#
+# MN10300 On-chip serial Rx interrupt handler for GDB stub I/O
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/thread_info.h>
+#include <asm/cpu-regs.h>
+#include <asm/frame.inc>
+#include <asm/intctl-regs.h>
+#include <asm/unit/serial.h>
+#include "mn10300-serial.h"
+
+	.text
+
+###############################################################################
+#
+# GDB stub serial receive interrupt entry point
+# - intended to run at interrupt priority 0
+#
+###############################################################################
+	.globl	gdbstub_io_rx_handler
+	.type	gdbstub_io_rx_handler,@function
+gdbstub_io_rx_handler:
+	movm	[d2,d3,a2,a3],(sp)
+
+	mov	(gdbstub_rx_inp),a3
+gdbstub_io_rx_more:
+	mov	a3,a2
+	add	2,a3
+	and	PAGE_SIZE_asm-1,a3
+	mov	(gdbstub_rx_outp),d3
+	cmp	a3,d3
+	beq	gdbstub_io_rx_overflow
+
+	movbu	(SCgSTR),d3
+	btst	SC01STR_RBF,d3
+	beq	gdbstub_io_rx_done
+	movbu	(SCgRXB),d2
+	movbu	d3,(gdbstub_rx_buffer+1,a2)
+	movbu	d2,(gdbstub_rx_buffer,a2)
+	mov	a3,(gdbstub_rx_inp)
+	bra	gdbstub_io_rx_more
+
+gdbstub_io_rx_done:
+	mov	GxICR_DETECT,d2
+	movbu	d2,(GxICR(SCgRXIRQ))	# ACK the interrupt
+	movhu	(GxICR(SCgRXIRQ)),d2	# flush
+
+	movm	(sp),[d2,d3,a2,a3]
+	bset	0x01,(gdbstub_busy)
+	beq	gdbstub_io_rx_enter
+	rti
+
+gdbstub_io_rx_overflow:
+	bset	0x01,(gdbstub_rx_overflow)
+	bra	gdbstub_io_rx_done
+
+###############################################################################
+#
+# debugging interrupt - enter the GDB stub proper
+#
+###############################################################################
+gdbstub_io_rx_enter:
+	or	EPSW_IE|EPSW_IM_1,epsw
+	add	-4,sp
+	SAVE_ALL
+
+	mov	0xffffffff,d0
+	mov	d0,(REG_ORIG_D0,fp)
+	mov	0x280,d1
+
+	mov	fp,d0
+	call	gdbstub_rx_irq[],0	# gdbstub_io_rx_irq(regs,excep)
+
+	and	~EPSW_IE,epsw
+	bclr	0x01,(gdbstub_busy)
+
+	.globl gdbstub_return
+gdbstub_return:
+	RESTORE_ALL
+
+	.size	gdbstub_io_rx_handler,.-gdbstub_io_rx_handler
diff --git a/arch/mn10300/kernel/gdb-io-ttysm.c b/arch/mn10300/kernel/gdb-io-ttysm.c
new file mode 100644
index 000000000000..c5451592d403
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-io-ttysm.c
@@ -0,0 +1,299 @@
+/* MN10300 On-chip serial driver for gdbstub I/O
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/gdb-stub.h>
+#include <asm/exceptions.h>
+#include <asm/unit/clock.h>
+#include "mn10300-serial.h"
+
+#if defined(CONFIG_GDBSTUB_ON_TTYSM0)
+struct mn10300_serial_port *const gdbstub_port = &mn10300_serial_port_sif0;
+#elif defined(CONFIG_GDBSTUB_ON_TTYSM1)
+struct mn10300_serial_port *const gdbstub_port = &mn10300_serial_port_sif1;
+#else
+struct mn10300_serial_port *const gdbstub_port = &mn10300_serial_port_sif2;
+#endif
+
+
+/*
+ * initialise the GDB stub I/O routines
+ */
+void __init gdbstub_io_init(void)
+{
+	uint16_t scxctr;
+	int tmp;
+
+	switch (gdbstub_port->clock_src) {
+	case MNSCx_CLOCK_SRC_IOCLK:
+		gdbstub_port->ioclk = MN10300_IOCLK;
+		break;
+
+#ifdef MN10300_IOBCLK
+	case MNSCx_CLOCK_SRC_IOBCLK:
+		gdbstub_port->ioclk = MN10300_IOBCLK;
+		break;
+#endif
+	default:
+		BUG();
+	}
+
+	/* set up the serial port */
+	gdbstub_io_set_baud(115200);
+
+	/* we want to get serial receive interrupts */
+	set_intr_level(gdbstub_port->rx_irq, GxICR_LEVEL_0);
+	set_intr_level(gdbstub_port->tx_irq, GxICR_LEVEL_0);
+	set_intr_stub(EXCEP_IRQ_LEVEL0, gdbstub_io_rx_handler);
+
+	*gdbstub_port->rx_icr |= GxICR_ENABLE;
+	tmp = *gdbstub_port->rx_icr;
+
+	/* enable the device */
+	scxctr = SC01CTR_CLN_8BIT;	/* 1N8 */
+	switch (gdbstub_port->div_timer) {
+	case MNSCx_DIV_TIMER_16BIT:
+		scxctr |= SC0CTR_CK_TM8UFLOW_8; /* == SC1CTR_CK_TM9UFLOW_8
+						   == SC2CTR_CK_TM10UFLOW_8 */
+		break;
+
+	case MNSCx_DIV_TIMER_8BIT:
+		scxctr |= SC0CTR_CK_TM2UFLOW_8;
+		break;
+	}
+
+	scxctr |= SC01CTR_TXE | SC01CTR_RXE;
+
+	*gdbstub_port->_control = scxctr;
+	tmp = *gdbstub_port->_control;
+
+	/* permit level 0 IRQs only */
+	asm volatile(
+		"	and %0,epsw	\n"
+		"	or %1,epsw	\n"
+		:
+		: "i"(~EPSW_IM), "i"(EPSW_IE|EPSW_IM_1)
+		);
+}
+
+/*
+ * set up the GDB stub serial port baud rate timers
+ */
+void gdbstub_io_set_baud(unsigned baud)
+{
+	const unsigned bits = 10; /* 1 [start] + 8 [data] + 0 [parity] +
+				   * 1 [stop] */
+	unsigned long ioclk = gdbstub_port->ioclk;
+	unsigned xdiv, tmp;
+	uint16_t tmxbr;
+	uint8_t tmxmd;
+
+	if (!baud) {
+		baud = 9600;
+	} else if (baud == 134) {
+		baud = 269;	/* 134 is really 134.5 */
+		xdiv = 2;
+	}
+
+try_alternative:
+	xdiv = 1;
+
+	switch (gdbstub_port->div_timer) {
+	case MNSCx_DIV_TIMER_16BIT:
+		tmxmd = TM8MD_SRC_IOCLK;
+		tmxbr = tmp = (ioclk / (baud * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+
+		tmxmd = TM8MD_SRC_IOCLK_8;
+		tmxbr = tmp = (ioclk / (baud * 8 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+
+		tmxmd = TM8MD_SRC_IOCLK_32;
+		tmxbr = tmp = (ioclk / (baud * 32 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+
+		break;
+
+	case MNSCx_DIV_TIMER_8BIT:
+		tmxmd = TM2MD_SRC_IOCLK;
+		tmxbr = tmp = (ioclk / (baud * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+
+		tmxmd = TM2MD_SRC_IOCLK_8;
+		tmxbr = tmp = (ioclk / (baud * 8 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+
+		tmxmd = TM2MD_SRC_IOCLK_32;
+		tmxbr = tmp = (ioclk / (baud * 32 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+		break;
+	}
+
+	/* as a last resort, if the quotient is zero, default to 9600 bps */
+	baud = 9600;
+	goto try_alternative;
+
+timer_okay:
+	gdbstub_port->uart.timeout = (2 * bits * HZ) / baud;
+	gdbstub_port->uart.timeout += HZ / 50;
+
+	/* set the timer to produce the required baud rate */
+	switch (gdbstub_port->div_timer) {
+	case MNSCx_DIV_TIMER_16BIT:
+		*gdbstub_port->_tmxmd = 0;
+		*gdbstub_port->_tmxbr = tmxbr;
+		*gdbstub_port->_tmxmd = TM8MD_INIT_COUNTER;
+		*gdbstub_port->_tmxmd = tmxmd | TM8MD_COUNT_ENABLE;
+		break;
+
+	case MNSCx_DIV_TIMER_8BIT:
+		*gdbstub_port->_tmxmd = 0;
+		*(volatile u8 *) gdbstub_port->_tmxbr = (u8)tmxbr;
+		*gdbstub_port->_tmxmd = TM2MD_INIT_COUNTER;
+		*gdbstub_port->_tmxmd = tmxmd | TM2MD_COUNT_ENABLE;
+		break;
+	}
+}
+
+/*
+ * wait for a character to come from the debugger
+ */
+int gdbstub_io_rx_char(unsigned char *_ch, int nonblock)
+{
+	unsigned ix;
+	u8 ch, st;
+
+	*_ch = 0xff;
+
+	if (gdbstub_rx_unget) {
+		*_ch = gdbstub_rx_unget;
+		gdbstub_rx_unget = 0;
+		return 0;
+	}
+
+try_again:
+	/* pull chars out of the buffer */
+	ix = gdbstub_rx_outp;
+	if (ix == gdbstub_rx_inp) {
+		if (nonblock)
+			return -EAGAIN;
+#ifdef CONFIG_MN10300_WD_TIMER
+		watchdog_alert_counter = 0;
+#endif /* CONFIG_MN10300_WD_TIMER */
+		goto try_again;
+	}
+
+	ch = gdbstub_rx_buffer[ix++];
+	st = gdbstub_rx_buffer[ix++];
+	gdbstub_rx_outp = ix & (PAGE_SIZE - 1);
+
+	st &= SC01STR_RXF | SC01STR_RBF | SC01STR_FEF | SC01STR_PEF |
+		SC01STR_OEF;
+
+	/* deal with what we've got
+	 * - note that the UART doesn't do BREAK-detection for us
+	 */
+	if (st & SC01STR_FEF && ch == 0) {
+		switch (gdbstub_port->rx_brk) {
+		case 0:	gdbstub_port->rx_brk = 1;	goto try_again;
+		case 1:	gdbstub_port->rx_brk = 2;	goto try_again;
+		case 2:
+			gdbstub_port->rx_brk = 3;
+			gdbstub_proto("### GDB MNSERIAL Rx Break Detected"
+				      " ###\n");
+			return -EINTR;
+		default:
+			goto try_again;
+		}
+	} else if (st & SC01STR_FEF) {
+		if (gdbstub_port->rx_brk)
+			goto try_again;
+
+		gdbstub_proto("### GDB MNSERIAL Framing Error ###\n");
+		return -EIO;
+	} else if (st & SC01STR_OEF) {
+		if (gdbstub_port->rx_brk)
+			goto try_again;
+
+		gdbstub_proto("### GDB MNSERIAL Overrun Error ###\n");
+		return -EIO;
+	} else if (st & SC01STR_PEF) {
+		if (gdbstub_port->rx_brk)
+			goto try_again;
+
+		gdbstub_proto("### GDB MNSERIAL Parity Error ###\n");
+		return -EIO;
+	} else {
+		/* look for the tail-end char on a break run */
+		if (gdbstub_port->rx_brk == 3) {
+			switch (ch) {
+			case 0xFF:
+			case 0xFE:
+			case 0xFC:
+			case 0xF8:
+			case 0xF0:
+			case 0xE0:
+			case 0xC0:
+			case 0x80:
+			case 0x00:
+				gdbstub_port->rx_brk = 0;
+				goto try_again;
+			default:
+				break;
+			}
+		}
+
+		gdbstub_port->rx_brk = 0;
+		gdbstub_io("### GDB Rx %02x (st=%02x) ###\n", ch, st);
+		*_ch = ch & 0x7f;
+		return 0;
+	}
+}
+
+/*
+ * send a character to the debugger
+ */
+void gdbstub_io_tx_char(unsigned char ch)
+{
+	while (*gdbstub_port->_status & SC01STR_TBF)
+		continue;
+
+	if (ch == 0x0a) {
+		*(u8 *) gdbstub_port->_txb = 0x0d;
+		while (*gdbstub_port->_status & SC01STR_TBF)
+			continue;
+	}
+
+	*(u8 *) gdbstub_port->_txb = ch;
+}
+
+/*
+ * flush the transmission buffers
+ */
+void gdbstub_io_tx_flush(void)
+{
+	while (*gdbstub_port->_status & (SC01STR_TBF | SC01STR_TXF))
+		continue;
+}
diff --git a/arch/mn10300/kernel/gdb-low.S b/arch/mn10300/kernel/gdb-low.S
new file mode 100644
index 000000000000..e2725552cd82
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-low.S
@@ -0,0 +1,115 @@
+###############################################################################
+#
+# MN10300 Low-level gdbstub routines
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/cpu-regs.h>
+#include <asm/exceptions.h>
+#include <asm/frame.inc>
+#include <asm/serial-regs.h>
+
+	.text
+
+###############################################################################
+#
+# GDB stub read memory with guard
+# - D0 holds the memory address to read
+# - D1 holds the address to store the byte into
+#
+###############################################################################
+	.globl gdbstub_read_byte_guard
+	.globl gdbstub_read_byte_cont
+ENTRY(gdbstub_read_byte)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_read_byte_guard:
+	movbu	(a0),d1
+gdbstub_read_byte_cont:
+	movbu	d1,(a1)
+	ret	[],0
+
+	.globl gdbstub_read_word_guard
+	.globl gdbstub_read_word_cont
+ENTRY(gdbstub_read_word)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_read_word_guard:
+	movhu	(a0),d1
+gdbstub_read_word_cont:
+	movhu	d1,(a1)
+	ret	[],0
+
+	.globl gdbstub_read_dword_guard
+	.globl gdbstub_read_dword_cont
+ENTRY(gdbstub_read_dword)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_read_dword_guard:
+	mov	(a0),d1
+gdbstub_read_dword_cont:
+	mov	d1,(a1)
+	ret	[],0
+
+###############################################################################
+#
+# GDB stub write memory with guard
+# - D0 holds the byte to store
+# - D1 holds the memory address to write
+#
+###############################################################################
+	.globl gdbstub_write_byte_guard
+	.globl gdbstub_write_byte_cont
+ENTRY(gdbstub_write_byte)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_write_byte_guard:
+	movbu	a0,(a1)
+gdbstub_write_byte_cont:
+	ret	[],0
+
+	.globl gdbstub_write_word_guard
+	.globl gdbstub_write_word_cont
+ENTRY(gdbstub_write_word)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_write_word_guard:
+	movhu	a0,(a1)
+gdbstub_write_word_cont:
+	ret	[],0
+
+	.globl gdbstub_write_dword_guard
+	.globl gdbstub_write_dword_cont
+ENTRY(gdbstub_write_dword)
+	mov	d0,a0
+	mov	d1,a1
+	clr	d0
+gdbstub_write_dword_guard:
+	mov	a0,(a1)
+gdbstub_write_dword_cont:
+	ret	[],0
+
+###############################################################################
+#
+# GDB stub BUG() trap
+#
+###############################################################################
+ENTRY(__gdbstub_bug_trap)
+	.byte	0xF7,0xF7	# don't use 0xFF as the JTAG unit preempts that
+	ret	[],0
diff --git a/arch/mn10300/kernel/gdb-stub.c b/arch/mn10300/kernel/gdb-stub.c
new file mode 100644
index 000000000000..21891c71d549
--- /dev/null
+++ b/arch/mn10300/kernel/gdb-stub.c
@@ -0,0 +1,1947 @@
+/* MN10300 GDB stub
+ *
+ * Originally written by Glenn Engel, Lake Stevens Instrument Division
+ *
+ * Contributed by HP Systems
+ *
+ * Modified for SPARC by Stu Grossman, Cygnus Support.
+ *
+ * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
+ * Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
+ *
+ * Copyright (C) 1995 Andreas Busse
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified for Linux/mn10300 by David Howells <dhowells@redhat.com>
+ */
+
+/*
+ *  To enable debugger support, two things need to happen.  One, a
+ *  call to set_debug_traps() is necessary in order to allow any breakpoints
+ *  or error conditions to be properly intercepted and reported to gdb.
+ *  Two, a breakpoint needs to be generated to begin communication.  This
+ *  is most easily accomplished by a call to breakpoint().  Breakpoint()
+ *  simulates a breakpoint by executing a BREAK instruction.
+ *
+ *
+ *    The following gdb commands are supported:
+ *
+ * command          function                               Return value
+ *
+ *    g             return the value of the CPU registers  hex data or ENN
+ *    G             set the value of the CPU registers     OK or ENN
+ *
+ *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+ *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+ *
+ *    c             Resume at current address              SNN   ( signal NN)
+ *    cAA..AA       Continue at address AA..AA             SNN
+ *
+ *    s             Step one instruction                   SNN
+ *    sAA..AA       Step one instruction from AA..AA       SNN
+ *
+ *    k             kill
+ *
+ *    ?             What was the last sigval ?             SNN   (signal NN)
+ *
+ *    bBB..BB	    Set baud rate to BB..BB		   OK or BNN, then sets
+ *							   baud rate
+ *
+ * All commands and responses are sent with a packet which includes a
+ * checksum.  A packet consists of
+ *
+ * $<packet info>#<checksum>.
+ *
+ * where
+ * <packet info> :: <characters representing the command or response>
+ * <checksum>    :: < two hex digits computed as modulo 256 sum of <packetinfo>>
+ *
+ * When a packet is received, it is first acknowledged with either '+' or '-'.
+ * '+' indicates a successful transfer.  '-' indicates a failed transfer.
+ *
+ * Example:
+ *
+ * Host:                  Reply:
+ * $m0,10#2a               +$00010203040506070809101112131415#42
+ *
+ *
+ *  ==============
+ *  MORE EXAMPLES:
+ *  ==============
+ *
+ *  For reference -- the following are the steps that one
+ *  company took (RidgeRun Inc) to get remote gdb debugging
+ *  going. In this scenario the host machine was a PC and the
+ *  target platform was a Galileo EVB64120A MIPS evaluation
+ *  board.
+ *
+ *  Step 1:
+ *  First download gdb-5.0.tar.gz from the internet.
+ *  and then build/install the package.
+ *
+ *  Example:
+ *    $ tar zxf gdb-5.0.tar.gz
+ *    $ cd gdb-5.0
+ *    $ ./configure --target=am33_2.0-linux-gnu
+ *    $ make
+ *    $ install
+ *    am33_2.0-linux-gnu-gdb
+ *
+ *  Step 2:
+ *  Configure linux for remote debugging and build it.
+ *
+ *  Example:
+ *    $ cd ~/linux
+ *    $ make menuconfig <go to "Kernel Hacking" and turn on remote debugging>
+ *    $ make dep; make vmlinux
+ *
+ *  Step 3:
+ *  Download the kernel to the remote target and start
+ *  the kernel running. It will promptly halt and wait
+ *  for the host gdb session to connect. It does this
+ *  since the "Kernel Hacking" option has defined
+ *  CONFIG_REMOTE_DEBUG which in turn enables your calls
+ *  to:
+ *     set_debug_traps();
+ *     breakpoint();
+ *
+ *  Step 4:
+ *  Start the gdb session on the host.
+ *
+ *  Example:
+ *    $ am33_2.0-linux-gnu-gdb vmlinux
+ *    (gdb) set remotebaud 115200
+ *    (gdb) target remote /dev/ttyS1
+ *    ...at this point you are connected to
+ *       the remote target and can use gdb
+ *       in the normal fasion. Setting
+ *       breakpoints, single stepping,
+ *       printing variables, etc.
+ *
+ */
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/bug.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/gdb-stub.h>
+#include <asm/exceptions.h>
+#include <asm/cacheflush.h>
+#include <asm/serial-regs.h>
+#include <asm/busctl-regs.h>
+#include <asm/unit/leds.h>
+#include <asm/unit/serial.h>
+
+/* define to use F7F7 rather than FF which is subverted by JTAG debugger */
+#undef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound buffers
+ * at least NUMREGBYTES*2 are needed for register packets
+ */
+#define BUFMAX 2048
+
+static const char gdbstub_banner[] =
+	"Linux/MN10300 GDB Stub (c) RedHat 2007\n";
+
+u8	gdbstub_rx_buffer[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+u32	gdbstub_rx_inp;
+u32	gdbstub_rx_outp;
+u8	gdbstub_busy;
+u8	gdbstub_rx_overflow;
+u8	gdbstub_rx_unget;
+
+static u8	gdbstub_flush_caches;
+static char	input_buffer[BUFMAX];
+static char	output_buffer[BUFMAX];
+static char	trans_buffer[BUFMAX];
+
+static const char hexchars[] = "0123456789abcdef";
+
+struct gdbstub_bkpt {
+	u8	*addr;		/* address of breakpoint */
+	u8	len;		/* size of breakpoint */
+	u8	origbytes[7];	/* original bytes */
+};
+
+static struct gdbstub_bkpt gdbstub_bkpts[256];
+
+/*
+ * local prototypes
+ */
+static void getpacket(char *buffer);
+static int putpacket(char *buffer);
+static int computeSignal(enum exception_code excep);
+static int hex(unsigned char ch);
+static int hexToInt(char **ptr, int *intValue);
+static unsigned char *mem2hex(const void *mem, char *buf, int count,
+			      int may_fault);
+static const char *hex2mem(const char *buf, void *_mem, int count,
+			   int may_fault);
+
+/*
+ * Convert ch from a hex digit to an int
+ */
+static int hex(unsigned char ch)
+{
+	if (ch >= 'a' && ch <= 'f')
+		return ch - 'a' + 10;
+	if (ch >= '0' && ch <= '9')
+		return ch - '0';
+	if (ch >= 'A' && ch <= 'F')
+		return ch - 'A' + 10;
+	return -1;
+}
+
+#ifdef CONFIG_GDBSTUB_DEBUGGING
+
+void debug_to_serial(const char *p, int n)
+{
+	__debug_to_serial(p, n);
+	/* gdbstub_console_write(NULL, p, n); */
+}
+
+void gdbstub_printk(const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	/* Emit the output into the temporary buffer */
+	va_start(args, fmt);
+	len = vsnprintf(trans_buffer, sizeof(trans_buffer), fmt, args);
+	va_end(args);
+	debug_to_serial(trans_buffer, len);
+}
+
+#endif
+
+static inline char *gdbstub_strcpy(char *dst, const char *src)
+{
+	int loop = 0;
+	while ((dst[loop] = src[loop]))
+	       loop++;
+	return dst;
+}
+
+/*
+ * scan for the sequence $<data>#<checksum>
+ */
+static void getpacket(char *buffer)
+{
+	unsigned char checksum;
+	unsigned char xmitcsum;
+	unsigned char ch;
+	int count, i, ret, error;
+
+	for (;;) {
+		/*
+		 * wait around for the start character,
+		 * ignore all other characters
+		 */
+		do {
+			gdbstub_io_rx_char(&ch, 0);
+		} while (ch != '$');
+
+		checksum = 0;
+		xmitcsum = -1;
+		count = 0;
+		error = 0;
+
+		/*
+		 * now, read until a # or end of buffer is found
+		 */
+		while (count < BUFMAX) {
+			ret = gdbstub_io_rx_char(&ch, 0);
+			if (ret < 0)
+				error = ret;
+
+			if (ch == '#')
+				break;
+			checksum += ch;
+			buffer[count] = ch;
+			count++;
+		}
+
+		if (error == -EIO) {
+			gdbstub_proto("### GDB Rx Error - Skipping packet"
+				      " ###\n");
+			gdbstub_proto("### GDB Tx NAK\n");
+			gdbstub_io_tx_char('-');
+			continue;
+		}
+
+		if (count >= BUFMAX || error)
+			continue;
+
+		buffer[count] = 0;
+
+		/* read the checksum */
+		ret = gdbstub_io_rx_char(&ch, 0);
+		if (ret < 0)
+			error = ret;
+		xmitcsum = hex(ch) << 4;
+
+		ret = gdbstub_io_rx_char(&ch, 0);
+		if (ret < 0)
+			error = ret;
+		xmitcsum |= hex(ch);
+
+		if (error) {
+			if (error == -EIO)
+				gdbstub_io("### GDB Rx Error -"
+					   " Skipping packet\n");
+			gdbstub_io("### GDB Tx NAK\n");
+			gdbstub_io_tx_char('-');
+			continue;
+		}
+
+		/* check the checksum */
+		if (checksum != xmitcsum) {
+			gdbstub_io("### GDB Tx NAK\n");
+			gdbstub_io_tx_char('-');	/* failed checksum */
+			continue;
+		}
+
+		gdbstub_proto("### GDB Rx '$%s#%02x' ###\n", buffer, checksum);
+		gdbstub_io("### GDB Tx ACK\n");
+		gdbstub_io_tx_char('+'); /* successful transfer */
+
+		/*
+		 * if a sequence char is present,
+		 * reply the sequence ID
+		 */
+		if (buffer[2] == ':') {
+			gdbstub_io_tx_char(buffer[0]);
+			gdbstub_io_tx_char(buffer[1]);
+
+			/*
+			 * remove sequence chars from buffer
+			 */
+			count = 0;
+			while (buffer[count])
+				count++;
+			for (i = 3; i <= count; i++)
+				buffer[i - 3] = buffer[i];
+		}
+
+		break;
+	}
+}
+
+/*
+ * send the packet in buffer.
+ * - return 0 if successfully ACK'd
+ * - return 1 if abandoned due to new incoming packet
+ */
+static int putpacket(char *buffer)
+{
+	unsigned char checksum;
+	unsigned char ch;
+	int count;
+
+	/*
+	 * $<packet info>#<checksum>.
+	 */
+	gdbstub_proto("### GDB Tx $'%s'#?? ###\n", buffer);
+
+	do {
+		gdbstub_io_tx_char('$');
+		checksum = 0;
+		count = 0;
+
+		while ((ch = buffer[count]) != 0) {
+			gdbstub_io_tx_char(ch);
+			checksum += ch;
+			count += 1;
+		}
+
+		gdbstub_io_tx_char('#');
+		gdbstub_io_tx_char(hexchars[checksum >> 4]);
+		gdbstub_io_tx_char(hexchars[checksum & 0xf]);
+
+	} while (gdbstub_io_rx_char(&ch, 0),
+		 ch == '-' && (gdbstub_io("### GDB Rx NAK\n"), 0),
+		 ch != '-' && ch != '+' &&
+		 (gdbstub_io("### GDB Rx ??? %02x\n", ch), 0),
+		 ch != '+' && ch != '$');
+
+	if (ch == '+') {
+		gdbstub_io("### GDB Rx ACK\n");
+		return 0;
+	}
+
+	gdbstub_io("### GDB Tx Abandoned\n");
+	gdbstub_rx_unget = ch;
+	return 1;
+}
+
+/*
+ * While we find nice hex chars, build an int.
+ * Return number of chars processed.
+ */
+static int hexToInt(char **ptr, int *intValue)
+{
+	int numChars = 0;
+	int hexValue;
+
+	*intValue = 0;
+
+	while (**ptr) {
+		hexValue = hex(**ptr);
+		if (hexValue < 0)
+			break;
+
+		*intValue = (*intValue << 4) | hexValue;
+		numChars++;
+
+		(*ptr)++;
+	}
+
+	return (numChars);
+}
+
+/*
+ * We single-step by setting breakpoints. When an exception
+ * is handled, we need to restore the instructions hoisted
+ * when the breakpoints were set.
+ *
+ * This is where we save the original instructions.
+ */
+static struct gdb_bp_save {
+	u8	*addr;
+	u8	opcode[2];
+} step_bp[2];
+
+static const unsigned char gdbstub_insn_sizes[256] =
+{
+	/* 1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+	1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3,	/* 0 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 1 */
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, /* 2 */
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, /* 3 */
+	1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, /* 4 */
+	1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, /* 5 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 8 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 9 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* a */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* b */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, /* c */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* e */
+	0, 2, 2, 2, 2, 2, 2, 4, 0, 3, 0, 4, 0, 6, 7, 1  /* f */
+};
+
+static int __gdbstub_mark_bp(u8 *addr, int ix)
+{
+	if (addr < (u8 *) 0x70000000UL)
+		return 0;
+	/* 70000000-7fffffff: vmalloc area */
+	if (addr < (u8 *) 0x80000000UL)
+		goto okay;
+	if (addr < (u8 *) 0x8c000000UL)
+		return 0;
+	/* 8c000000-93ffffff: SRAM, SDRAM */
+	if (addr < (u8 *) 0x94000000UL)
+		goto okay;
+	return 0;
+
+okay:
+	if (gdbstub_read_byte(addr + 0, &step_bp[ix].opcode[0]) < 0 ||
+	    gdbstub_read_byte(addr + 1, &step_bp[ix].opcode[1]) < 0)
+		return 0;
+
+	step_bp[ix].addr = addr;
+	return 1;
+}
+
+static inline void __gdbstub_restore_bp(void)
+{
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+	if (step_bp[0].addr) {
+		gdbstub_write_byte(step_bp[0].opcode[0], step_bp[0].addr + 0);
+		gdbstub_write_byte(step_bp[0].opcode[1], step_bp[0].addr + 1);
+	}
+	if (step_bp[1].addr) {
+		gdbstub_write_byte(step_bp[1].opcode[0], step_bp[1].addr + 0);
+		gdbstub_write_byte(step_bp[1].opcode[1], step_bp[1].addr + 1);
+	}
+#else
+	if (step_bp[0].addr)
+		gdbstub_write_byte(step_bp[0].opcode[0], step_bp[0].addr + 0);
+	if (step_bp[1].addr)
+		gdbstub_write_byte(step_bp[1].opcode[0], step_bp[1].addr + 0);
+#endif
+
+	gdbstub_flush_caches = 1;
+
+	step_bp[0].addr		= NULL;
+	step_bp[0].opcode[0]	= 0;
+	step_bp[0].opcode[1]	= 0;
+	step_bp[1].addr		= NULL;
+	step_bp[1].opcode[0]	= 0;
+	step_bp[1].opcode[1]	= 0;
+}
+
+/*
+ * emulate single stepping by means of breakpoint instructions
+ */
+static int gdbstub_single_step(struct pt_regs *regs)
+{
+	unsigned size;
+	uint32_t x;
+	uint8_t cur, *pc, *sp;
+
+	step_bp[0].addr		= NULL;
+	step_bp[0].opcode[0]	= 0;
+	step_bp[0].opcode[1]	= 0;
+	step_bp[1].addr		= NULL;
+	step_bp[1].opcode[0]	= 0;
+	step_bp[1].opcode[1]	= 0;
+	x = 0;
+
+	pc = (u8 *) regs->pc;
+	sp = (u8 *) (regs + 1);
+	if (gdbstub_read_byte(pc, &cur) < 0)
+		return -EFAULT;
+
+	gdbstub_bkpt("Single Step from %p { %02x }\n", pc, cur);
+
+	gdbstub_flush_caches = 1;
+
+	size = gdbstub_insn_sizes[cur];
+	if (size > 0) {
+		if (!__gdbstub_mark_bp(pc + size, 0))
+			goto fault;
+	} else {
+		switch (cur) {
+			/* Bxx (d8,PC) */
+		case 0xc0:
+		case 0xc1:
+		case 0xc2:
+		case 0xc3:
+		case 0xc4:
+		case 0xc5:
+		case 0xc6:
+		case 0xc7:
+		case 0xc8:
+		case 0xc9:
+		case 0xca:
+			if (gdbstub_read_byte(pc + 1, (u8 *) &x) < 0)
+				goto fault;
+			if (!__gdbstub_mark_bp(pc + 2, 0))
+				goto fault;
+			if ((x < 0 || x > 2) &&
+			    !__gdbstub_mark_bp(pc + (s8) x, 1))
+				goto fault;
+			break;
+
+			/* LXX (d8,PC) */
+		case 0xd0:
+		case 0xd1:
+		case 0xd2:
+		case 0xd3:
+		case 0xd4:
+		case 0xd5:
+		case 0xd6:
+		case 0xd7:
+		case 0xd8:
+		case 0xd9:
+		case 0xda:
+			if (!__gdbstub_mark_bp(pc + 1, 0))
+				goto fault;
+			if (regs->pc != regs->lar &&
+			    !__gdbstub_mark_bp((u8 *) regs->lar, 1))
+				goto fault;
+			break;
+
+			/* SETLB - loads the next for bytes into the LIR
+			 * register */
+		case 0xdb:
+			if (!__gdbstub_mark_bp(pc + 1, 0))
+				goto fault;
+			break;
+
+			/* JMP (d16,PC) or CALL (d16,PC) */
+		case 0xcc:
+		case 0xcd:
+			if (gdbstub_read_byte(pc + 1, ((u8 *) &x) + 0) < 0 ||
+			    gdbstub_read_byte(pc + 2, ((u8 *) &x) + 1) < 0)
+				goto fault;
+			if (!__gdbstub_mark_bp(pc + (s16) x, 0))
+				goto fault;
+			break;
+
+			/* JMP (d32,PC) or CALL (d32,PC) */
+		case 0xdc:
+		case 0xdd:
+			if (gdbstub_read_byte(pc + 1, ((u8 *) &x) + 0) < 0 ||
+			    gdbstub_read_byte(pc + 2, ((u8 *) &x) + 1) < 0 ||
+			    gdbstub_read_byte(pc + 3, ((u8 *) &x) + 2) < 0 ||
+			    gdbstub_read_byte(pc + 4, ((u8 *) &x) + 3) < 0)
+				goto fault;
+			if (!__gdbstub_mark_bp(pc + (s32) x, 0))
+				goto fault;
+			break;
+
+			/* RETF */
+		case 0xde:
+			if (!__gdbstub_mark_bp((u8 *) regs->mdr, 0))
+				goto fault;
+			break;
+
+			/* RET */
+		case 0xdf:
+			if (gdbstub_read_byte(pc + 2, (u8 *) &x) < 0)
+				goto fault;
+			sp += (s8)x;
+			if (gdbstub_read_byte(sp + 0, ((u8 *) &x) + 0) < 0 ||
+			    gdbstub_read_byte(sp + 1, ((u8 *) &x) + 1) < 0 ||
+			    gdbstub_read_byte(sp + 2, ((u8 *) &x) + 2) < 0 ||
+			    gdbstub_read_byte(sp + 3, ((u8 *) &x) + 3) < 0)
+				goto fault;
+			if (!__gdbstub_mark_bp((u8 *) x, 0))
+				goto fault;
+			break;
+
+		case 0xf0:
+			if (gdbstub_read_byte(pc + 1, &cur) < 0)
+				goto fault;
+
+			if (cur >= 0xf0 && cur <= 0xf7) {
+				/* JMP (An) / CALLS (An) */
+				switch (cur & 3) {
+				case 0: x = regs->a0; break;
+				case 1: x = regs->a1; break;
+				case 2: x = regs->a2; break;
+				case 3: x = regs->a3; break;
+				}
+				if (!__gdbstub_mark_bp((u8 *) x, 0))
+					goto fault;
+			} else if (cur == 0xfc) {
+				/* RETS */
+				if (gdbstub_read_byte(
+					    sp + 0, ((u8 *) &x) + 0) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 1, ((u8 *) &x) + 1) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 2, ((u8 *) &x) + 2) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 3, ((u8 *) &x) + 3) < 0)
+					goto fault;
+				if (!__gdbstub_mark_bp((u8 *) x, 0))
+					goto fault;
+			} else if (cur == 0xfd) {
+				/* RTI */
+				if (gdbstub_read_byte(
+					    sp + 4, ((u8 *) &x) + 0) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 5, ((u8 *) &x) + 1) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 6, ((u8 *) &x) + 2) < 0 ||
+				    gdbstub_read_byte(
+					    sp + 7, ((u8 *) &x) + 3) < 0)
+					goto fault;
+				if (!__gdbstub_mark_bp((u8 *) x, 0))
+					goto fault;
+			} else {
+				if (!__gdbstub_mark_bp(pc + 2, 0))
+					goto fault;
+			}
+
+			break;
+
+			/* potential 3-byte conditional branches */
+		case 0xf8:
+			if (gdbstub_read_byte(pc + 1, &cur) < 0)
+				goto fault;
+			if (!__gdbstub_mark_bp(pc + 3, 0))
+				goto fault;
+
+			if (cur >= 0xe8 && cur <= 0xeb) {
+				if (gdbstub_read_byte(
+					    pc + 2, ((u8 *) &x) + 0) < 0)
+					goto fault;
+				if ((x < 0 || x > 3) &&
+				    !__gdbstub_mark_bp(pc + (s8) x, 1))
+					goto fault;
+			}
+			break;
+
+		case 0xfa:
+			if (gdbstub_read_byte(pc + 1, &cur) < 0)
+				goto fault;
+
+			if (cur == 0xff) {
+				/* CALLS (d16,PC) */
+				if (gdbstub_read_byte(
+					    pc + 2, ((u8 *) &x) + 0) < 0 ||
+				    gdbstub_read_byte(
+					    pc + 3, ((u8 *) &x) + 1) < 0)
+					goto fault;
+				if (!__gdbstub_mark_bp(pc + (s16) x, 0))
+					goto fault;
+			} else {
+				if (!__gdbstub_mark_bp(pc + 4, 0))
+					goto fault;
+			}
+			break;
+
+		case 0xfc:
+			if (gdbstub_read_byte(pc + 1, &cur) < 0)
+				goto fault;
+			if (cur == 0xff) {
+				/* CALLS (d32,PC) */
+				if (gdbstub_read_byte(
+					    pc + 2, ((u8 *) &x) + 0) < 0 ||
+				    gdbstub_read_byte(
+					    pc + 3, ((u8 *) &x) + 1) < 0 ||
+				    gdbstub_read_byte(
+					    pc + 4, ((u8 *) &x) + 2) < 0 ||
+				    gdbstub_read_byte(
+					    pc + 5, ((u8 *) &x) + 3) < 0)
+					goto fault;
+				if (!__gdbstub_mark_bp(
+					    pc + (s32) x, 0))
+					goto fault;
+			} else {
+				if (!__gdbstub_mark_bp(
+					    pc + 6, 0))
+					goto fault;
+			}
+			break;
+
+		}
+	}
+
+	gdbstub_bkpt("Step: %02x at %p; %02x at %p\n",
+		     step_bp[0].opcode[0], step_bp[0].addr,
+		     step_bp[1].opcode[0], step_bp[1].addr);
+
+	if (step_bp[0].addr) {
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+		if (gdbstub_write_byte(0xF7, step_bp[0].addr + 0) < 0 ||
+		    gdbstub_write_byte(0xF7, step_bp[0].addr + 1) < 0)
+			goto fault;
+#else
+		if (gdbstub_write_byte(0xFF, step_bp[0].addr + 0) < 0)
+			goto fault;
+#endif
+	}
+
+	if (step_bp[1].addr) {
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+		if (gdbstub_write_byte(0xF7, step_bp[1].addr + 0) < 0 ||
+		    gdbstub_write_byte(0xF7, step_bp[1].addr + 1) < 0)
+			goto fault;
+#else
+		if (gdbstub_write_byte(0xFF, step_bp[1].addr + 0) < 0)
+			goto fault;
+#endif
+	}
+
+	return 0;
+
+ fault:
+	/* uh-oh - silly address alert, try and restore things */
+	__gdbstub_restore_bp();
+	return -EFAULT;
+}
+
+#ifdef CONFIG_GDBSTUB_CONSOLE
+
+void gdbstub_console_write(struct console *con, const char *p, unsigned n)
+{
+	static const char gdbstub_cr[] = { 0x0d };
+	char outbuf[26];
+	int qty;
+	u8 busy;
+
+	busy = gdbstub_busy;
+	gdbstub_busy = 1;
+
+	outbuf[0] = 'O';
+
+	while (n > 0) {
+		qty = 1;
+
+		while (n > 0 && qty < 20) {
+			mem2hex(p, outbuf + qty, 2, 0);
+			qty += 2;
+			if (*p == 0x0a) {
+				mem2hex(gdbstub_cr, outbuf + qty, 2, 0);
+				qty += 2;
+			}
+			p++;
+			n--;
+		}
+
+		outbuf[qty] = 0;
+		putpacket(outbuf);
+	}
+
+	gdbstub_busy = busy;
+}
+
+static kdev_t gdbstub_console_dev(struct console *con)
+{
+	return MKDEV(1, 3); /* /dev/null */
+}
+
+static struct console gdbstub_console = {
+	.name	= "gdb",
+	.write	= gdbstub_console_write,
+	.device	= gdbstub_console_dev,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1,
+};
+
+#endif
+
+/*
+ * Convert the memory pointed to by mem into hex, placing result in buf.
+ * - if successful, return a pointer to the last char put in buf (NUL)
+ * - in case of mem fault, return NULL
+ * may_fault is non-zero if we are reading from arbitrary memory, but is
+ * currently not used.
+ */
+static
+unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
+{
+	const u8 *mem = _mem;
+	u8 ch[4];
+
+	if ((u32) mem & 1 && count >= 1) {
+		if (gdbstub_read_byte(mem, ch) != 0)
+			return 0;
+		*buf++ = hexchars[ch[0] >> 4];
+		*buf++ = hexchars[ch[0] & 0xf];
+		mem++;
+		count--;
+	}
+
+	if ((u32) mem & 3 && count >= 2) {
+		if (gdbstub_read_word(mem, ch) != 0)
+			return 0;
+		*buf++ = hexchars[ch[0] >> 4];
+		*buf++ = hexchars[ch[0] & 0xf];
+		*buf++ = hexchars[ch[1] >> 4];
+		*buf++ = hexchars[ch[1] & 0xf];
+		mem += 2;
+		count -= 2;
+	}
+
+	while (count >= 4) {
+		if (gdbstub_read_dword(mem, ch) != 0)
+			return 0;
+		*buf++ = hexchars[ch[0] >> 4];
+		*buf++ = hexchars[ch[0] & 0xf];
+		*buf++ = hexchars[ch[1] >> 4];
+		*buf++ = hexchars[ch[1] & 0xf];
+		*buf++ = hexchars[ch[2] >> 4];
+		*buf++ = hexchars[ch[2] & 0xf];
+		*buf++ = hexchars[ch[3] >> 4];
+		*buf++ = hexchars[ch[3] & 0xf];
+		mem += 4;
+		count -= 4;
+	}
+
+	if (count >= 2) {
+		if (gdbstub_read_word(mem, ch) != 0)
+			return 0;
+		*buf++ = hexchars[ch[0] >> 4];
+		*buf++ = hexchars[ch[0] & 0xf];
+		*buf++ = hexchars[ch[1] >> 4];
+		*buf++ = hexchars[ch[1] & 0xf];
+		mem += 2;
+		count -= 2;
+	}
+
+	if (count >= 1) {
+		if (gdbstub_read_byte(mem, ch) != 0)
+			return 0;
+		*buf++ = hexchars[ch[0] >> 4];
+		*buf++ = hexchars[ch[0] & 0xf];
+	}
+
+	*buf = 0;
+	return buf;
+}
+
+/*
+ * convert the hex array pointed to by buf into binary to be placed in mem
+ * return a pointer to the character AFTER the last byte written
+ * may_fault is non-zero if we are reading from arbitrary memory, but is
+ * currently not used.
+ */
+static
+const char *hex2mem(const char *buf, void *_mem, int count, int may_fault)
+{
+	u8 *mem = _mem;
+	union {
+		u32 val;
+		u8 b[4];
+	} ch;
+
+	if ((u32) mem & 1 && count >= 1) {
+		ch.b[0]  = hex(*buf++) << 4;
+		ch.b[0] |= hex(*buf++);
+		if (gdbstub_write_byte(ch.val, mem) != 0)
+			return 0;
+		mem++;
+		count--;
+	}
+
+	if ((u32) mem & 3 && count >= 2) {
+		ch.b[0]  = hex(*buf++) << 4;
+		ch.b[0] |= hex(*buf++);
+		ch.b[1]  = hex(*buf++) << 4;
+		ch.b[1] |= hex(*buf++);
+		if (gdbstub_write_word(ch.val, mem) != 0)
+			return 0;
+		mem += 2;
+		count -= 2;
+	}
+
+	while (count >= 4) {
+		ch.b[0]  = hex(*buf++) << 4;
+		ch.b[0] |= hex(*buf++);
+		ch.b[1]  = hex(*buf++) << 4;
+		ch.b[1] |= hex(*buf++);
+		ch.b[2]  = hex(*buf++) << 4;
+		ch.b[2] |= hex(*buf++);
+		ch.b[3]  = hex(*buf++) << 4;
+		ch.b[3] |= hex(*buf++);
+		if (gdbstub_write_dword(ch.val, mem) != 0)
+			return 0;
+		mem += 4;
+		count -= 4;
+	}
+
+	if (count >= 2) {
+		ch.b[0]  = hex(*buf++) << 4;
+		ch.b[0] |= hex(*buf++);
+		ch.b[1]  = hex(*buf++) << 4;
+		ch.b[1] |= hex(*buf++);
+		if (gdbstub_write_word(ch.val, mem) != 0)
+			return 0;
+		mem += 2;
+		count -= 2;
+	}
+
+	if (count >= 1) {
+		ch.b[0]  = hex(*buf++) << 4;
+		ch.b[0] |= hex(*buf++);
+		if (gdbstub_write_byte(ch.val, mem) != 0)
+			return 0;
+	}
+
+	return buf;
+}
+
+/*
+ * This table contains the mapping between MN10300 exception codes, and
+ * signals, which are primarily what GDB understands.  It also indicates
+ * which hardware traps we need to commandeer when initializing the stub.
+ */
+static const struct excep_to_sig_map {
+	enum exception_code	excep;	/* MN10300 exception code */
+	unsigned char		signo;	/* Signal that we map this into */
+} excep_to_sig_map[] = {
+	{ EXCEP_ITLBMISS,	SIGSEGV		},
+	{ EXCEP_DTLBMISS,	SIGSEGV		},
+	{ EXCEP_TRAP,		SIGTRAP		},
+	{ EXCEP_ISTEP,		SIGTRAP		},
+	{ EXCEP_IBREAK,		SIGTRAP		},
+	{ EXCEP_OBREAK,		SIGTRAP		},
+	{ EXCEP_UNIMPINS,	SIGILL		},
+	{ EXCEP_UNIMPEXINS,	SIGILL		},
+	{ EXCEP_MEMERR,		SIGSEGV		},
+	{ EXCEP_MISALIGN,	SIGSEGV		},
+	{ EXCEP_BUSERROR,	SIGBUS		},
+	{ EXCEP_ILLINSACC,	SIGSEGV		},
+	{ EXCEP_ILLDATACC,	SIGSEGV		},
+	{ EXCEP_IOINSACC,	SIGSEGV		},
+	{ EXCEP_PRIVINSACC,	SIGSEGV		},
+	{ EXCEP_PRIVDATACC,	SIGSEGV		},
+	{ EXCEP_FPU_DISABLED,	SIGFPE		},
+	{ EXCEP_FPU_UNIMPINS,	SIGFPE		},
+	{ EXCEP_FPU_OPERATION,	SIGFPE		},
+	{ EXCEP_WDT,		SIGALRM		},
+	{ EXCEP_NMI,		SIGQUIT		},
+	{ EXCEP_IRQ_LEVEL0,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL1,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL2,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL3,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL4,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL5,	SIGINT		},
+	{ EXCEP_IRQ_LEVEL6,	SIGINT		},
+	{ 0, 0}
+};
+
+/*
+ * convert the MN10300 exception code into a UNIX signal number
+ */
+static int computeSignal(enum exception_code excep)
+{
+	const struct excep_to_sig_map *map;
+
+	for (map = excep_to_sig_map; map->signo; map++)
+		if (map->excep == excep)
+			return map->signo;
+
+	return SIGHUP; /* default for things we don't know about */
+}
+
+static u32 gdbstub_fpcr, gdbstub_fpufs_array[32];
+
+/*
+ *
+ */
+static void gdbstub_store_fpu(void)
+{
+#ifdef CONFIG_FPU
+
+	asm volatile(
+		"or %2,epsw\n"
+#ifdef CONFIG_MN10300_PROC_MN103E010
+		"nop\n"
+		"nop\n"
+#endif
+		"mov %1, a1\n"
+		"fmov fs0,  (a1+)\n"
+		"fmov fs1,  (a1+)\n"
+		"fmov fs2,  (a1+)\n"
+		"fmov fs3,  (a1+)\n"
+		"fmov fs4,  (a1+)\n"
+		"fmov fs5,  (a1+)\n"
+		"fmov fs6,  (a1+)\n"
+		"fmov fs7,  (a1+)\n"
+		"fmov fs8,  (a1+)\n"
+		"fmov fs9,  (a1+)\n"
+		"fmov fs10, (a1+)\n"
+		"fmov fs11, (a1+)\n"
+		"fmov fs12, (a1+)\n"
+		"fmov fs13, (a1+)\n"
+		"fmov fs14, (a1+)\n"
+		"fmov fs15, (a1+)\n"
+		"fmov fs16, (a1+)\n"
+		"fmov fs17, (a1+)\n"
+		"fmov fs18, (a1+)\n"
+		"fmov fs19, (a1+)\n"
+		"fmov fs20, (a1+)\n"
+		"fmov fs21, (a1+)\n"
+		"fmov fs22, (a1+)\n"
+		"fmov fs23, (a1+)\n"
+		"fmov fs24, (a1+)\n"
+		"fmov fs25, (a1+)\n"
+		"fmov fs26, (a1+)\n"
+		"fmov fs27, (a1+)\n"
+		"fmov fs28, (a1+)\n"
+		"fmov fs29, (a1+)\n"
+		"fmov fs30, (a1+)\n"
+		"fmov fs31, (a1+)\n"
+		"fmov fpcr, %0\n"
+		: "=d"(gdbstub_fpcr)
+		: "g" (&gdbstub_fpufs_array), "i"(EPSW_FE)
+		: "a1"
+		);
+#endif
+}
+
+/*
+ *
+ */
+static void gdbstub_load_fpu(void)
+{
+#ifdef CONFIG_FPU
+
+	asm volatile(
+		"or %1,epsw\n"
+#ifdef CONFIG_MN10300_PROC_MN103E010
+		"nop\n"
+		"nop\n"
+#endif
+		"mov %0, a1\n"
+		"fmov (a1+), fs0\n"
+		"fmov (a1+), fs1\n"
+		"fmov (a1+), fs2\n"
+		"fmov (a1+), fs3\n"
+		"fmov (a1+), fs4\n"
+		"fmov (a1+), fs5\n"
+		"fmov (a1+), fs6\n"
+		"fmov (a1+), fs7\n"
+		"fmov (a1+), fs8\n"
+		"fmov (a1+), fs9\n"
+		"fmov (a1+), fs10\n"
+		"fmov (a1+), fs11\n"
+		"fmov (a1+), fs12\n"
+		"fmov (a1+), fs13\n"
+		"fmov (a1+), fs14\n"
+		"fmov (a1+), fs15\n"
+		"fmov (a1+), fs16\n"
+		"fmov (a1+), fs17\n"
+		"fmov (a1+), fs18\n"
+		"fmov (a1+), fs19\n"
+		"fmov (a1+), fs20\n"
+		"fmov (a1+), fs21\n"
+		"fmov (a1+), fs22\n"
+		"fmov (a1+), fs23\n"
+		"fmov (a1+), fs24\n"
+		"fmov (a1+), fs25\n"
+		"fmov (a1+), fs26\n"
+		"fmov (a1+), fs27\n"
+		"fmov (a1+), fs28\n"
+		"fmov (a1+), fs29\n"
+		"fmov (a1+), fs30\n"
+		"fmov (a1+), fs31\n"
+		"fmov %2, fpcr\n"
+		:
+		: "g" (&gdbstub_fpufs_array), "i"(EPSW_FE), "d"(gdbstub_fpcr)
+		: "a1"
+	);
+#endif
+}
+
+/*
+ * set a software breakpoint
+ */
+int gdbstub_set_breakpoint(u8 *addr, int len)
+{
+	int bkpt, loop, xloop;
+
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+	len = (len + 1) & ~1;
+#endif
+
+	gdbstub_bkpt("setbkpt(%p,%d)\n", addr, len);
+
+	for (bkpt = 255; bkpt >= 0; bkpt--)
+		if (!gdbstub_bkpts[bkpt].addr)
+			break;
+	if (bkpt < 0)
+		return -ENOSPC;
+
+	for (loop = 0; loop < len; loop++)
+		if (gdbstub_read_byte(&addr[loop],
+				      &gdbstub_bkpts[bkpt].origbytes[loop]
+				      ) < 0)
+			return -EFAULT;
+
+	gdbstub_flush_caches = 1;
+
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+	for (loop = 0; loop < len; loop++)
+		if (gdbstub_write_byte(0xF7, &addr[loop]) < 0)
+			goto restore;
+#else
+	for (loop = 0; loop < len; loop++)
+		if (gdbstub_write_byte(0xFF, &addr[loop]) < 0)
+			goto restore;
+#endif
+
+	gdbstub_bkpts[bkpt].addr = addr;
+	gdbstub_bkpts[bkpt].len = len;
+
+	gdbstub_bkpt("Set BKPT[%02x]: %p-%p {%02x%02x%02x%02x%02x%02x%02x}\n",
+		     bkpt,
+		     gdbstub_bkpts[bkpt].addr,
+		     gdbstub_bkpts[bkpt].addr + gdbstub_bkpts[bkpt].len - 1,
+		     gdbstub_bkpts[bkpt].origbytes[0],
+		     gdbstub_bkpts[bkpt].origbytes[1],
+		     gdbstub_bkpts[bkpt].origbytes[2],
+		     gdbstub_bkpts[bkpt].origbytes[3],
+		     gdbstub_bkpts[bkpt].origbytes[4],
+		     gdbstub_bkpts[bkpt].origbytes[5],
+		     gdbstub_bkpts[bkpt].origbytes[6]
+		     );
+
+	return 0;
+
+restore:
+	for (xloop = 0; xloop < loop; xloop++)
+		gdbstub_write_byte(gdbstub_bkpts[bkpt].origbytes[xloop],
+				   addr + xloop);
+	return -EFAULT;
+}
+
+/*
+ * clear a software breakpoint
+ */
+int gdbstub_clear_breakpoint(u8 *addr, int len)
+{
+	int bkpt, loop;
+
+#ifdef GDBSTUB_USE_F7F7_AS_BREAKPOINT
+	len = (len + 1) & ~1;
+#endif
+
+	gdbstub_bkpt("clearbkpt(%p,%d)\n", addr, len);
+
+	for (bkpt = 255; bkpt >= 0; bkpt--)
+		if (gdbstub_bkpts[bkpt].addr == addr &&
+		    gdbstub_bkpts[bkpt].len == len)
+			break;
+	if (bkpt < 0)
+		return -ENOENT;
+
+	gdbstub_bkpts[bkpt].addr = NULL;
+
+	gdbstub_flush_caches = 1;
+
+	for (loop = 0; loop < len; loop++)
+		if (gdbstub_write_byte(gdbstub_bkpts[bkpt].origbytes[loop],
+				       addr + loop) < 0)
+			return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * This function does all command processing for interfacing to gdb
+ * - returns 1 if the exception should be skipped, 0 otherwise.
+ */
+static int gdbstub(struct pt_regs *regs, enum exception_code excep)
+{
+	unsigned long *stack;
+	unsigned long epsw, mdr;
+	uint32_t zero, ssp;
+	uint8_t broke;
+	char *ptr;
+	int sigval;
+	int addr;
+	int length;
+	int loop;
+
+	if (excep == EXCEP_FPU_DISABLED)
+		return 0;
+
+	gdbstub_flush_caches = 0;
+
+	mn10300_set_gdbleds(1);
+
+	asm volatile("mov mdr,%0" : "=d"(mdr));
+	asm volatile("mov epsw,%0" : "=d"(epsw));
+	asm volatile("mov %0,epsw"
+		     :: "d"((epsw & ~EPSW_IM) | EPSW_IE | EPSW_IM_1));
+
+	gdbstub_store_fpu();
+
+#ifdef CONFIG_GDBSTUB_IMMEDIATE
+	/* skip the initial pause loop */
+	if (regs->pc == (unsigned long) __gdbstub_pause)
+		regs->pc = (unsigned long) start_kernel;
+#endif
+
+	/* if we were single stepping, restore the opcodes hoisted for the
+	 * breakpoint[s] */
+	broke = 0;
+	if ((step_bp[0].addr && step_bp[0].addr == (u8 *) regs->pc) ||
+	    (step_bp[1].addr && step_bp[1].addr == (u8 *) regs->pc))
+		broke = 1;
+
+	__gdbstub_restore_bp();
+
+	if (gdbstub_rx_unget) {
+		sigval = SIGINT;
+		if (gdbstub_rx_unget != 3)
+			goto packet_waiting;
+		gdbstub_rx_unget = 0;
+	}
+
+	stack = (unsigned long *) regs->sp;
+	sigval = broke ? SIGTRAP : computeSignal(excep);
+
+	/* send information about a BUG() */
+	if (!user_mode(regs) && excep == EXCEP_SYSCALL15) {
+		const struct bug_entry *bug;
+
+		bug = find_bug(regs->pc);
+		if (bug)
+			goto found_bug;
+		length = snprintf(trans_buffer, sizeof(trans_buffer),
+				  "BUG() at address %lx\n", regs->pc);
+		goto send_bug_pkt;
+
+	found_bug:
+		length = snprintf(trans_buffer, sizeof(trans_buffer),
+				  "BUG() at address %lx (%s:%d)\n",
+				  regs->pc, bug->file, bug->line);
+
+	send_bug_pkt:
+		ptr = output_buffer;
+		*ptr++ = 'O';
+		ptr = mem2hex(trans_buffer, ptr, length, 0);
+		*ptr = 0;
+		putpacket(output_buffer);
+
+		regs->pc -= 2;
+		sigval = SIGABRT;
+	} else if (regs->pc == (unsigned long) __gdbstub_bug_trap) {
+		regs->pc = regs->mdr;
+		sigval = SIGABRT;
+	}
+
+	/*
+	 * send a message to the debugger's user saying what happened if it may
+	 * not be clear cut (we can't map exceptions onto signals properly)
+	 */
+	if (sigval != SIGINT && sigval != SIGTRAP && sigval != SIGILL) {
+		static const char title[] = "Excep ", tbcberr[] = "BCBERR ";
+		static const char crlf[] = "\r\n";
+		char hx;
+		u32 bcberr = BCBERR;
+
+		ptr = output_buffer;
+		*ptr++ = 'O';
+		ptr = mem2hex(title, ptr, sizeof(title) - 1, 0);
+
+		hx = hexchars[(excep & 0xf000) >> 12];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(excep & 0x0f00) >> 8];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(excep & 0x00f0) >> 4];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(excep & 0x000f)];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+
+		ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0);
+		*ptr = 0;
+		putpacket(output_buffer);	/* send it off... */
+
+		/* BCBERR */
+		ptr = output_buffer;
+		*ptr++ = 'O';
+		ptr = mem2hex(tbcberr, ptr, sizeof(tbcberr) - 1, 0);
+
+		hx = hexchars[(bcberr & 0xf0000000) >> 28];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x0f000000) >> 24];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x00f00000) >> 20];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x000f0000) >> 16];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x0000f000) >> 12];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x00000f00) >> 8];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x000000f0) >> 4];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+		hx = hexchars[(bcberr & 0x0000000f)];
+		*ptr++ = hexchars[hx >> 4];	*ptr++ = hexchars[hx & 0xf];
+
+		ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0);
+		*ptr = 0;
+		putpacket(output_buffer);	/* send it off... */
+	}
+
+	/*
+	 * tell the debugger that an exception has occurred
+	 */
+	ptr = output_buffer;
+
+	/*
+	 * Send trap type (converted to signal)
+	 */
+	*ptr++ = 'T';
+	*ptr++ = hexchars[sigval >> 4];
+	*ptr++ = hexchars[sigval & 0xf];
+
+	/*
+	 * Send Error PC
+	 */
+	*ptr++ = hexchars[GDB_REGID_PC >> 4];
+	*ptr++ = hexchars[GDB_REGID_PC & 0xf];
+	*ptr++ = ':';
+	ptr = mem2hex(&regs->pc, ptr, 4, 0);
+	*ptr++ = ';';
+
+	/*
+	 * Send frame pointer
+	 */
+	*ptr++ = hexchars[GDB_REGID_FP >> 4];
+	*ptr++ = hexchars[GDB_REGID_FP & 0xf];
+	*ptr++ = ':';
+	ptr = mem2hex(&regs->a3, ptr, 4, 0);
+	*ptr++ = ';';
+
+	/*
+	 * Send stack pointer
+	 */
+	ssp = (unsigned long) (regs + 1);
+	*ptr++ = hexchars[GDB_REGID_SP >> 4];
+	*ptr++ = hexchars[GDB_REGID_SP & 0xf];
+	*ptr++ = ':';
+	ptr = mem2hex(&ssp, ptr, 4, 0);
+	*ptr++ = ';';
+
+	*ptr++ = 0;
+	putpacket(output_buffer);	/* send it off... */
+
+packet_waiting:
+	/*
+	 * Wait for input from remote GDB
+	 */
+	while (1) {
+		output_buffer[0] = 0;
+		getpacket(input_buffer);
+
+		switch (input_buffer[0]) {
+			/* request repeat of last signal number */
+		case '?':
+			output_buffer[0] = 'S';
+			output_buffer[1] = hexchars[sigval >> 4];
+			output_buffer[2] = hexchars[sigval & 0xf];
+			output_buffer[3] = 0;
+			break;
+
+		case 'd':
+			/* toggle debug flag */
+			break;
+
+			/*
+			 * Return the value of the CPU registers
+			 */
+		case 'g':
+			zero = 0;
+			ssp = (u32) (regs + 1);
+			ptr = output_buffer;
+			ptr = mem2hex(&regs->d0, ptr, 4, 0);
+			ptr = mem2hex(&regs->d1, ptr, 4, 0);
+			ptr = mem2hex(&regs->d2, ptr, 4, 0);
+			ptr = mem2hex(&regs->d3, ptr, 4, 0);
+			ptr = mem2hex(&regs->a0, ptr, 4, 0);
+			ptr = mem2hex(&regs->a1, ptr, 4, 0);
+			ptr = mem2hex(&regs->a2, ptr, 4, 0);
+			ptr = mem2hex(&regs->a3, ptr, 4, 0);
+
+			ptr = mem2hex(&ssp, ptr, 4, 0);		/* 8 */
+			ptr = mem2hex(&regs->pc, ptr, 4, 0);
+			ptr = mem2hex(&regs->mdr, ptr, 4, 0);
+			ptr = mem2hex(&regs->epsw, ptr, 4, 0);
+			ptr = mem2hex(&regs->lir, ptr, 4, 0);
+			ptr = mem2hex(&regs->lar, ptr, 4, 0);
+			ptr = mem2hex(&regs->mdrq, ptr, 4, 0);
+
+			ptr = mem2hex(&regs->e0, ptr, 4, 0);	/* 15 */
+			ptr = mem2hex(&regs->e1, ptr, 4, 0);
+			ptr = mem2hex(&regs->e2, ptr, 4, 0);
+			ptr = mem2hex(&regs->e3, ptr, 4, 0);
+			ptr = mem2hex(&regs->e4, ptr, 4, 0);
+			ptr = mem2hex(&regs->e5, ptr, 4, 0);
+			ptr = mem2hex(&regs->e6, ptr, 4, 0);
+			ptr = mem2hex(&regs->e7, ptr, 4, 0);
+
+			ptr = mem2hex(&ssp, ptr, 4, 0);
+			ptr = mem2hex(&regs, ptr, 4, 0);
+			ptr = mem2hex(&regs->sp, ptr, 4, 0);
+			ptr = mem2hex(&regs->mcrh, ptr, 4, 0);	/* 26 */
+			ptr = mem2hex(&regs->mcrl, ptr, 4, 0);
+			ptr = mem2hex(&regs->mcvf, ptr, 4, 0);
+
+			ptr = mem2hex(&gdbstub_fpcr, ptr, 4, 0); /* 29 - FPCR */
+			ptr = mem2hex(&zero, ptr, 4, 0);
+			ptr = mem2hex(&zero, ptr, 4, 0);
+			for (loop = 0; loop < 32; loop++)
+				ptr = mem2hex(&gdbstub_fpufs_array[loop],
+					      ptr, 4, 0); /* 32 - FS0-31 */
+
+			break;
+
+			/*
+			 * set the value of the CPU registers - return OK
+			 */
+		case 'G':
+		{
+			const char *ptr;
+
+			ptr = &input_buffer[1];
+			ptr = hex2mem(ptr, &regs->d0, 4, 0);
+			ptr = hex2mem(ptr, &regs->d1, 4, 0);
+			ptr = hex2mem(ptr, &regs->d2, 4, 0);
+			ptr = hex2mem(ptr, &regs->d3, 4, 0);
+			ptr = hex2mem(ptr, &regs->a0, 4, 0);
+			ptr = hex2mem(ptr, &regs->a1, 4, 0);
+			ptr = hex2mem(ptr, &regs->a2, 4, 0);
+			ptr = hex2mem(ptr, &regs->a3, 4, 0);
+
+			ptr = hex2mem(ptr, &ssp, 4, 0);		/* 8 */
+			ptr = hex2mem(ptr, &regs->pc, 4, 0);
+			ptr = hex2mem(ptr, &regs->mdr, 4, 0);
+			ptr = hex2mem(ptr, &regs->epsw, 4, 0);
+			ptr = hex2mem(ptr, &regs->lir, 4, 0);
+			ptr = hex2mem(ptr, &regs->lar, 4, 0);
+			ptr = hex2mem(ptr, &regs->mdrq, 4, 0);
+
+			ptr = hex2mem(ptr, &regs->e0, 4, 0);	/* 15 */
+			ptr = hex2mem(ptr, &regs->e1, 4, 0);
+			ptr = hex2mem(ptr, &regs->e2, 4, 0);
+			ptr = hex2mem(ptr, &regs->e3, 4, 0);
+			ptr = hex2mem(ptr, &regs->e4, 4, 0);
+			ptr = hex2mem(ptr, &regs->e5, 4, 0);
+			ptr = hex2mem(ptr, &regs->e6, 4, 0);
+			ptr = hex2mem(ptr, &regs->e7, 4, 0);
+
+			ptr = hex2mem(ptr, &ssp, 4, 0);
+			ptr = hex2mem(ptr, &zero, 4, 0);
+			ptr = hex2mem(ptr, &regs->sp, 4, 0);
+			ptr = hex2mem(ptr, &regs->mcrh, 4, 0);	/* 26 */
+			ptr = hex2mem(ptr, &regs->mcrl, 4, 0);
+			ptr = hex2mem(ptr, &regs->mcvf, 4, 0);
+
+			ptr = hex2mem(ptr, &zero, 4, 0);	/* 29 - FPCR */
+			ptr = hex2mem(ptr, &zero, 4, 0);
+			ptr = hex2mem(ptr, &zero, 4, 0);
+			for (loop = 0; loop < 32; loop++)     /* 32 - FS0-31 */
+				ptr = hex2mem(ptr, &zero, 4, 0);
+
+#if 0
+			/*
+			 * See if the stack pointer has moved. If so, then copy
+			 * the saved locals and ins to the new location.
+			 */
+			unsigned long *newsp = (unsigned long *) registers[SP];
+			if (sp != newsp)
+				sp = memcpy(newsp, sp, 16 * 4);
+#endif
+
+			gdbstub_strcpy(output_buffer, "OK");
+		}
+		break;
+
+		/*
+		 * mAA..AA,LLLL  Read LLLL bytes at address AA..AA
+		 */
+		case 'm':
+			ptr = &input_buffer[1];
+
+			if (hexToInt(&ptr, &addr) &&
+			    *ptr++ == ',' &&
+			    hexToInt(&ptr, &length)
+			    ) {
+				if (mem2hex((char *) addr, output_buffer,
+					    length, 1))
+					break;
+				gdbstub_strcpy(output_buffer, "E03");
+			} else {
+				gdbstub_strcpy(output_buffer, "E01");
+			}
+			break;
+
+			/*
+			 * MAA..AA,LLLL: Write LLLL bytes at address AA.AA
+			 * return OK
+			 */
+		case 'M':
+			ptr = &input_buffer[1];
+
+			if (hexToInt(&ptr, &addr) &&
+			    *ptr++ == ',' &&
+			    hexToInt(&ptr, &length) &&
+			    *ptr++ == ':'
+			    ) {
+				if (hex2mem(ptr, (char *) addr, length, 1))
+					gdbstub_strcpy(output_buffer, "OK");
+				else
+					gdbstub_strcpy(output_buffer, "E03");
+
+				gdbstub_flush_caches = 1;
+			} else {
+				gdbstub_strcpy(output_buffer, "E02");
+			}
+			break;
+
+			/*
+			 * cAA..AA    Continue at address AA..AA(optional)
+			 */
+		case 'c':
+			/* try to read optional parameter, pc unchanged if no
+			 * parm */
+
+			ptr = &input_buffer[1];
+			if (hexToInt(&ptr, &addr))
+				regs->pc = addr;
+			goto done;
+
+			/*
+			 * kill the program
+			 */
+		case 'k' :
+			goto done;	/* just continue */
+
+			/*
+			 * Reset the whole machine (FIXME: system dependent)
+			 */
+		case 'r':
+			break;
+
+			/*
+			 * Step to next instruction
+			 */
+		case 's':
+			/*
+			 * using the T flag doesn't seem to perform single
+			 * stepping (it seems to wind up being caught by the
+			 * JTAG unit), so we have to use breakpoints and
+			 * continue instead.
+			 */
+			if (gdbstub_single_step(regs) < 0)
+				/* ignore any fault error for now */
+				gdbstub_printk("unable to set single-step"
+					       " bp\n");
+			goto done;
+
+			/*
+			 * Set baud rate (bBB)
+			 */
+		case 'b':
+			do {
+				int baudrate;
+
+				ptr = &input_buffer[1];
+				if (!hexToInt(&ptr, &baudrate)) {
+					gdbstub_strcpy(output_buffer, "B01");
+					break;
+				}
+
+				if (baudrate) {
+					/* ACK before changing speed */
+					putpacket("OK");
+					gdbstub_io_set_baud(baudrate);
+				}
+			} while (0);
+			break;
+
+			/*
+			 * Set breakpoint
+			 */
+		case 'Z':
+			ptr = &input_buffer[1];
+
+			if (!hexToInt(&ptr, &loop) || *ptr++ != ',' ||
+			    !hexToInt(&ptr, &addr) || *ptr++ != ',' ||
+			    !hexToInt(&ptr, &length)
+			    ) {
+				gdbstub_strcpy(output_buffer, "E01");
+				break;
+			}
+
+			/* only support software breakpoints */
+			gdbstub_strcpy(output_buffer, "E03");
+			if (loop != 0 ||
+			    length < 1 ||
+			    length > 7 ||
+			    (unsigned long) addr < 4096)
+				break;
+
+			if (gdbstub_set_breakpoint((u8 *) addr, length) < 0)
+				break;
+
+			gdbstub_strcpy(output_buffer, "OK");
+			break;
+
+			/*
+			 * Clear breakpoint
+			 */
+		case 'z':
+			ptr = &input_buffer[1];
+
+			if (!hexToInt(&ptr, &loop) || *ptr++ != ',' ||
+			    !hexToInt(&ptr, &addr) || *ptr++ != ',' ||
+			    !hexToInt(&ptr, &length)
+			    ) {
+				gdbstub_strcpy(output_buffer, "E01");
+				break;
+			}
+
+			/* only support software breakpoints */
+			gdbstub_strcpy(output_buffer, "E03");
+			if (loop != 0 ||
+			    length < 1 ||
+			    length > 7 ||
+			    (unsigned long) addr < 4096)
+				break;
+
+			if (gdbstub_clear_breakpoint((u8 *) addr, length) < 0)
+				break;
+
+			gdbstub_strcpy(output_buffer, "OK");
+			break;
+
+		default:
+			gdbstub_proto("### GDB Unsupported Cmd '%s'\n",
+				      input_buffer);
+			break;
+		}
+
+		/* reply to the request */
+		putpacket(output_buffer);
+	}
+
+done:
+	/*
+	 * Need to flush the instruction cache here, as we may
+	 * have deposited a breakpoint, and the icache probably
+	 * has no way of knowing that a data ref to some location
+	 * may have changed something that is in the instruction
+	 * cache.
+	 * NB: We flush both caches, just to be sure...
+	 */
+	if (gdbstub_flush_caches)
+		gdbstub_purge_cache();
+
+	gdbstub_load_fpu();
+	mn10300_set_gdbleds(0);
+	if (excep == EXCEP_NMI)
+		NMICR = NMICR_NMIF;
+
+	touch_softlockup_watchdog();
+
+	local_irq_restore(epsw);
+	return 1;
+}
+
+/*
+ * handle event interception
+ */
+asmlinkage int gdbstub_intercept(struct pt_regs *regs,
+				 enum exception_code excep)
+{
+	static u8 notfirst = 1;
+	int ret;
+
+	if (gdbstub_busy)
+		gdbstub_printk("--> gdbstub reentered itself\n");
+	gdbstub_busy = 1;
+
+	if (notfirst) {
+		unsigned long mdr;
+		asm("mov mdr,%0" : "=d"(mdr));
+
+		gdbstub_entry(
+			"--> gdbstub_intercept(%p,%04x) [MDR=%lx PC=%lx]\n",
+			regs, excep, mdr, regs->pc);
+
+		gdbstub_entry(
+			"PC:  %08lx EPSW:  %08lx  SSP: %08lx mode: %s\n",
+			regs->pc, regs->epsw, (unsigned long) &ret,
+			user_mode(regs) ? "User" : "Super");
+		gdbstub_entry(
+			"d0:  %08lx   d1:  %08lx   d2: %08lx   d3: %08lx\n",
+			regs->d0, regs->d1, regs->d2, regs->d3);
+		gdbstub_entry(
+			"a0:  %08lx   a1:  %08lx   a2: %08lx   a3: %08lx\n",
+			regs->a0, regs->a1, regs->a2, regs->a3);
+		gdbstub_entry(
+			"e0:  %08lx   e1:  %08lx   e2: %08lx   e3: %08lx\n",
+			regs->e0, regs->e1, regs->e2, regs->e3);
+		gdbstub_entry(
+			"e4:  %08lx   e5:  %08lx   e6: %08lx   e7: %08lx\n",
+			regs->e4, regs->e5, regs->e6, regs->e7);
+		gdbstub_entry(
+			"lar: %08lx   lir: %08lx  mdr: %08lx  usp: %08lx\n",
+			regs->lar, regs->lir, regs->mdr, regs->sp);
+		gdbstub_entry(
+			"cvf: %08lx   crl: %08lx  crh: %08lx  drq: %08lx\n",
+			regs->mcvf, regs->mcrl, regs->mcrh, regs->mdrq);
+		gdbstub_entry(
+			"threadinfo=%p task=%p)\n",
+			current_thread_info(), current);
+	} else {
+		notfirst = 1;
+	}
+
+	ret = gdbstub(regs, excep);
+
+	gdbstub_entry("<-- gdbstub_intercept()\n");
+	gdbstub_busy = 0;
+	return ret;
+}
+
+/*
+ * handle the GDB stub itself causing an exception
+ */
+asmlinkage void gdbstub_exception(struct pt_regs *regs,
+				  enum exception_code excep)
+{
+	unsigned long mdr;
+
+	asm("mov mdr,%0" : "=d"(mdr));
+	gdbstub_entry("--> gdbstub exception({%p},%04x) [MDR=%lx]\n",
+		      regs, excep, mdr);
+
+	while ((unsigned long) regs == 0xffffffff) {}
+
+	/* handle guarded memory accesses where we know it might fault */
+	if (regs->pc == (unsigned) gdbstub_read_byte_guard) {
+		regs->pc = (unsigned) gdbstub_read_byte_cont;
+		goto fault;
+	}
+
+	if (regs->pc == (unsigned) gdbstub_read_word_guard) {
+		regs->pc = (unsigned) gdbstub_read_word_cont;
+		goto fault;
+	}
+
+	if (regs->pc == (unsigned) gdbstub_read_dword_guard) {
+		regs->pc = (unsigned) gdbstub_read_dword_cont;
+		goto fault;
+	}
+
+	if (regs->pc == (unsigned) gdbstub_write_byte_guard) {
+		regs->pc = (unsigned) gdbstub_write_byte_cont;
+		goto fault;
+	}
+
+	if (regs->pc == (unsigned) gdbstub_write_word_guard) {
+		regs->pc = (unsigned) gdbstub_write_word_cont;
+		goto fault;
+	}
+
+	if (regs->pc == (unsigned) gdbstub_write_dword_guard) {
+		regs->pc = (unsigned) gdbstub_write_dword_cont;
+		goto fault;
+	}
+
+	gdbstub_printk("\n### GDB stub caused an exception ###\n");
+
+	/* something went horribly wrong */
+	console_verbose();
+	show_registers(regs);
+
+	panic("GDB Stub caused an unexpected exception - can't continue\n");
+
+	/* we caught an attempt by the stub to access silly memory */
+fault:
+	gdbstub_entry("<-- gdbstub exception() = EFAULT\n");
+	regs->d0 = -EFAULT;
+	return;
+}
+
+/*
+ * send an exit message to GDB
+ */
+void gdbstub_exit(int status)
+{
+	unsigned char checksum;
+	unsigned char ch;
+	int count;
+
+	gdbstub_busy = 1;
+	output_buffer[0] = 'W';
+	output_buffer[1] = hexchars[(status >> 4) & 0x0F];
+	output_buffer[2] = hexchars[status & 0x0F];
+	output_buffer[3] = 0;
+
+	gdbstub_io_tx_char('$');
+	checksum = 0;
+	count = 0;
+
+	while ((ch = output_buffer[count]) != 0) {
+		gdbstub_io_tx_char(ch);
+		checksum += ch;
+		count += 1;
+	}
+
+	gdbstub_io_tx_char('#');
+	gdbstub_io_tx_char(hexchars[checksum >> 4]);
+	gdbstub_io_tx_char(hexchars[checksum & 0xf]);
+
+	/* make sure the output is flushed, or else RedBoot might clobber it */
+	gdbstub_io_tx_flush();
+
+	gdbstub_busy = 0;
+}
+
+/*
+ * initialise the GDB stub
+ */
+asmlinkage void __init gdbstub_init(void)
+{
+#ifdef CONFIG_GDBSTUB_IMMEDIATE
+	unsigned char ch;
+	int ret;
+#endif
+
+	gdbstub_busy = 1;
+
+	printk(KERN_INFO "%s", gdbstub_banner);
+
+	gdbstub_io_init();
+
+	gdbstub_entry("--> gdbstub_init\n");
+
+	/* try to talk to GDB (or anyone insane enough to want to type GDB
+	 * protocol by hand) */
+	gdbstub_io("### GDB Tx ACK\n");
+	gdbstub_io_tx_char('+'); /* 'hello world' */
+
+#ifdef CONFIG_GDBSTUB_IMMEDIATE
+	gdbstub_printk("GDB Stub waiting for packet\n");
+
+	/* in case GDB is started before us, ACK any packets that are already
+	 * sitting there (presumably "$?#xx")
+	 */
+	do { gdbstub_io_rx_char(&ch, 0); } while (ch != '$');
+	do { gdbstub_io_rx_char(&ch, 0); } while (ch != '#');
+	/* eat first csum byte */
+	do { ret = gdbstub_io_rx_char(&ch, 0); } while (ret != 0);
+	/* eat second csum byte */
+	do { ret = gdbstub_io_rx_char(&ch, 0); } while (ret != 0);
+
+	gdbstub_io("### GDB Tx NAK\n");
+	gdbstub_io_tx_char('-'); /* NAK it */
+
+#else
+	printk("GDB Stub ready\n");
+#endif
+
+	gdbstub_busy = 0;
+	gdbstub_entry("<-- gdbstub_init\n");
+}
+
+/*
+ * register the console at a more appropriate time
+ */
+#ifdef CONFIG_GDBSTUB_CONSOLE
+static int __init gdbstub_postinit(void)
+{
+	printk(KERN_NOTICE "registering console\n");
+	register_console(&gdbstub_console);
+	return 0;
+}
+
+__initcall(gdbstub_postinit);
+#endif
+
+/*
+ * handle character reception on GDB serial port
+ * - jump into the GDB stub if BREAK is detected on the serial line
+ */
+asmlinkage void gdbstub_rx_irq(struct pt_regs *regs, enum exception_code excep)
+{
+	char ch;
+	int ret;
+
+	gdbstub_entry("--> gdbstub_rx_irq\n");
+
+	do {
+		ret = gdbstub_io_rx_char(&ch, 1);
+		if (ret != -EIO && ret != -EAGAIN) {
+			if (ret != -EINTR)
+				gdbstub_rx_unget = ch;
+			gdbstub(regs, excep);
+		}
+	} while (ret != -EAGAIN);
+
+	gdbstub_entry("<-- gdbstub_rx_irq\n");
+}
diff --git a/arch/mn10300/kernel/head.S b/arch/mn10300/kernel/head.S
new file mode 100644
index 000000000000..606bd8c6758d
--- /dev/null
+++ b/arch/mn10300/kernel/head.S
@@ -0,0 +1,255 @@
+/* Boot entry point for MN10300 kernel
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <linux/serial_reg.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/frame.inc>
+#include <asm/param.h>
+#include <asm/unit/serial.h>
+
+	.section .text.head,"ax"
+
+###############################################################################
+#
+# bootloader entry point
+#
+###############################################################################
+	.globl	_start
+	.type	_start,@function
+_start:
+	# save commandline pointer
+	mov	d0,a3
+
+	# preload the PGD pointer register
+	mov	swapper_pg_dir,d0
+	mov	d0,(PTBR)
+
+	# turn on the TLBs
+	mov	MMUCTR_IIV|MMUCTR_DIV,d0
+	mov	d0,(MMUCTR)
+	mov	MMUCTR_ITE|MMUCTR_DTE|MMUCTR_CE,d0
+	mov	d0,(MMUCTR)
+
+	# turn on AM33v2 exception handling mode and set the trap table base
+	movhu	(CPUP),d0
+	or	CPUP_EXM_AM33V2,d0
+	movhu	d0,(CPUP)
+	mov	CONFIG_INTERRUPT_VECTOR_BASE,d0
+	mov	d0,(TBR)
+
+	# invalidate and enable both of the caches
+	mov	CHCTR,a0
+	clr	d0
+	movhu	d0,(a0)					# turn off first
+	mov	CHCTR_ICINV|CHCTR_DCINV,d0
+	movhu	d0,(a0)
+	setlb
+	mov	(a0),d0
+	btst	CHCTR_ICBUSY|CHCTR_DCBUSY,d0		# wait till not busy
+	lne
+
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+#ifdef CONFIG_MN10300_CACHE_WBACK
+#ifndef CONFIG_MN10300_CACHE_WBACK_NOWRALLOC
+	mov	CHCTR_ICEN|CHCTR_DCEN|CHCTR_DCWTMD_WRBACK,d0
+#else
+	mov	CHCTR_ICEN|CHCTR_DCEN|CHCTR_DCWTMD_WRBACK|CHCTR_DCALMD,d0
+#endif /* CACHE_DISABLED */
+#else
+	mov	CHCTR_ICEN|CHCTR_DCEN|CHCTR_DCWTMD_WRTHROUGH,d0
+#endif /* WBACK */
+	movhu	d0,(a0)					# enable
+#endif /* NOWRALLOC */
+
+	# turn on RTS on the debug serial port if applicable
+#ifdef CONFIG_MN10300_UNIT_ASB2305
+	bset	UART_MCR_RTS,(ASB2305_DEBUG_MCR)
+#endif
+
+	# clear the BSS area
+	mov	__bss_start,a0
+	mov	__bss_stop,a1
+	clr	d0
+bssclear:
+	cmp	a1,a0
+	bge	bssclear_end
+	mov	d0,(a0)
+	inc4	a0
+	bra	bssclear
+bssclear_end:
+
+	# retrieve the parameters (including command line) before we overwrite
+	# them
+	cmp	0xabadcafe,d1
+	bne	__no_parameters
+
+__copy_parameters:
+	mov	redboot_command_line,a0
+	mov	a0,a1
+	add	COMMAND_LINE_SIZE,a1
+1:
+	movbu	(a3),d0
+	inc	a3
+	movbu	d0,(a0)
+	inc	a0
+	cmp	a1,a0
+	blt	1b
+
+	mov	redboot_platform_name,a0
+	mov	a0,a1
+	add	COMMAND_LINE_SIZE,a1
+	mov	d2,a3
+1:
+	movbu	(a3),d0
+	inc	a3
+	movbu	d0,(a0)
+	inc	a0
+	cmp	a1,a0
+	blt	1b
+
+__no_parameters:
+
+	# set up the registers with recognisable rubbish in them
+	mov	init_thread_union+THREAD_SIZE-12,sp
+
+	mov	0xea01eaea,d0
+	mov	d0,(4,sp)		# EPSW save area
+	mov	0xea02eaea,d0
+	mov	d0,(8,sp)		# PC save area
+
+	mov	0xeb0060ed,d0
+	mov	d0,mdr
+	mov	0xeb0061ed,d0
+	mov	d0,mdrq
+	mov	0xeb0062ed,d0
+	mov	d0,mcrh
+	mov	0xeb0063ed,d0
+	mov	d0,mcrl
+	mov	0xeb0064ed,d0
+	mov	d0,mcvf
+	mov	0xed0065ed,a3
+	mov	a3,usp
+
+	mov	0xed00e0ed,e0
+	mov	0xed00e1ed,e1
+	mov	0xed00e2ed,e2
+	mov	0xed00e3ed,e3
+	mov	0xed00e4ed,e4
+	mov	0xed00e5ed,e5
+	mov	0xed00e6ed,e6
+	mov	0xed00e7ed,e7
+
+	mov	0xed00d0ed,d0
+	mov	0xed00d1ed,d1
+	mov	0xed00d2ed,d2
+	mov	0xed00d3ed,d3
+	mov	0xed00a0ed,a0
+	mov	0xed00a1ed,a1
+	mov	0xed00a2ed,a2
+	mov	0,a3
+
+	# set up the initial kernel stack
+	SAVE_ALL
+	mov	0xffffffff,d0
+	mov	d0,(REG_ORIG_D0,fp)
+
+	# put different recognisable rubbish in the regs
+	mov	0xfb0060ed,d0
+	mov	d0,mdr
+	mov	0xfb0061ed,d0
+	mov	d0,mdrq
+	mov	0xfb0062ed,d0
+	mov	d0,mcrh
+	mov	0xfb0063ed,d0
+	mov	d0,mcrl
+	mov	0xfb0064ed,d0
+	mov	d0,mcvf
+	mov	0xfd0065ed,a0
+	mov	a0,usp
+
+	mov	0xfd00e0ed,e0
+	mov	0xfd00e1ed,e1
+	mov	0xfd00e2ed,e2
+	mov	0xfd00e3ed,e3
+	mov	0xfd00e4ed,e4
+	mov	0xfd00e5ed,e5
+	mov	0xfd00e6ed,e6
+	mov	0xfd00e7ed,e7
+
+	mov	0xfd00d0ed,d0
+	mov	0xfd00d1ed,d1
+	mov	0xfd00d2ed,d2
+	mov	0xfd00d3ed,d3
+	mov	0xfd00a0ed,a0
+	mov	0xfd00a1ed,a1
+	mov	0xfd00a2ed,a2
+
+	# we may be holding current in E2
+#ifdef CONFIG_MN10300_CURRENT_IN_E2
+	mov	init_task,e2
+#endif
+
+	# initialise the processor and the unit
+	call	processor_init[],0
+	call	unit_init[],0
+
+#ifdef CONFIG_GDBSTUB
+	call	gdbstub_init[],0
+
+#ifdef CONFIG_GDBSTUB_IMMEDIATE
+	.globl	__gdbstub_pause
+__gdbstub_pause:
+	bra	__gdbstub_pause
+#endif
+#endif
+
+	jmp	start_kernel
+	.size	_start, _start-.
+ENTRY(__head_end)
+
+/*
+ * This is initialized to disallow all access to the low 2G region
+ * - the high 2G region is managed directly by the MMU
+ * - range 0x70000000-0x7C000000 are initialised for use by VMALLOC
+ */
+	.section .bss
+	.balign PAGE_SIZE
+ENTRY(swapper_pg_dir)
+        .space PTRS_PER_PGD*4
+
+/*
+ * The page tables are initialized to only 8MB here - the final page
+ * tables are set up later depending on memory size.
+ */
+
+	.balign PAGE_SIZE
+ENTRY(empty_zero_page)
+	.space PAGE_SIZE
+
+	.balign PAGE_SIZE
+ENTRY(empty_bad_page)
+	.space PAGE_SIZE
+
+	.balign PAGE_SIZE
+ENTRY(empty_bad_pte_table)
+	.space PAGE_SIZE
+
+	.balign PAGE_SIZE
+ENTRY(large_page_table)
+	.space PAGE_SIZE
+
+	.balign PAGE_SIZE
+ENTRY(kernel_vmalloc_ptes)
+	.space ((VMALLOC_END-VMALLOC_START)/PAGE_SIZE)*4
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
new file mode 100644
index 000000000000..39fe6882dd1d
--- /dev/null
+++ b/arch/mn10300/kernel/init_task.c
@@ -0,0 +1,45 @@
+/* MN10300 Initial task definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
diff --git a/arch/mn10300/kernel/internal.h b/arch/mn10300/kernel/internal.h
new file mode 100644
index 000000000000..eee2eee86267
--- /dev/null
+++ b/arch/mn10300/kernel/internal.h
@@ -0,0 +1,20 @@
+/* Internal definitions for the arch part of the core kernel
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * kthread.S
+ */
+extern int kernel_thread_helper(int);
+
+/*
+ * entry.S
+ */
+extern void ret_from_fork(struct task_struct *) __attribute__((noreturn));
diff --git a/arch/mn10300/kernel/io.c b/arch/mn10300/kernel/io.c
new file mode 100644
index 000000000000..e96fdf6bb542
--- /dev/null
+++ b/arch/mn10300/kernel/io.c
@@ -0,0 +1,30 @@
+/* MN10300 Misaligned multibyte-word I/O
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/io.h>
+
+/*
+ * output data from a potentially misaligned buffer
+ */
+void __outsl(unsigned long addr, const void *buffer, int count)
+{
+	const unsigned char *buf = buffer;
+	unsigned long val;
+
+	while (count--) {
+		memcpy(&val, buf, 4);
+		outl(val, addr);
+		buf += 4;
+	}
+}
+EXPORT_SYMBOL(__outsl);
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
new file mode 100644
index 000000000000..761c434a2488
--- /dev/null
+++ b/arch/mn10300/kernel/irq.c
@@ -0,0 +1,235 @@
+/* MN10300 Arch-specific interrupt handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+#include <asm/setup.h>
+
+unsigned long __mn10300_irq_enabled_epsw = EPSW_IE | EPSW_IM_7;
+EXPORT_SYMBOL(__mn10300_irq_enabled_epsw);
+
+atomic_t irq_err_count;
+
+/*
+ * MN10300 INTC controller operations
+ */
+static void mn10300_cpupic_disable(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_DETECT;
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_enable(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_ack(unsigned int irq)
+{
+	u16 tmp;
+	*(volatile u8 *) &GxICR(irq) = GxICR_DETECT;
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_mask(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL);
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_mask_ack(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_DETECT;
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_unmask(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE | GxICR_DETECT;
+	tmp = GxICR(irq);
+}
+
+static void mn10300_cpupic_end(unsigned int irq)
+{
+	u16 tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_LEVEL) | GxICR_ENABLE;
+	tmp = GxICR(irq);
+}
+
+static struct irq_chip mn10300_cpu_pic = {
+	.name		= "cpu",
+	.disable	= mn10300_cpupic_disable,
+	.enable		= mn10300_cpupic_enable,
+	.ack		= mn10300_cpupic_ack,
+	.mask		= mn10300_cpupic_mask,
+	.mask_ack	= mn10300_cpupic_mask_ack,
+	.unmask		= mn10300_cpupic_unmask,
+	.end		= mn10300_cpupic_end,
+};
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(int irq)
+{
+	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+/*
+ * change the level at which an IRQ executes
+ * - must not be called whilst interrupts are being processed!
+ */
+void set_intr_level(int irq, u16 level)
+{
+	u16 tmp;
+
+	if (in_interrupt())
+		BUG();
+
+	tmp = GxICR(irq);
+	GxICR(irq) = (tmp & GxICR_ENABLE) | level;
+	tmp = GxICR(irq);
+}
+
+/*
+ * mark an interrupt to be ACK'd after interrupt handlers have been run rather
+ * than before
+ * - see Documentation/mn10300/features.txt
+ */
+void set_intr_postackable(int irq)
+{
+	set_irq_handler(irq, handle_level_irq);
+}
+
+/*
+ * initialise the interrupt system
+ */
+void __init init_IRQ(void)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++)
+		if (irq_desc[irq].chip == &no_irq_type)
+			set_irq_chip_and_handler(irq, &mn10300_cpu_pic,
+						 handle_edge_irq);
+	unit_init_IRQ();
+}
+
+/*
+ * handle normal device IRQs
+ */
+asmlinkage void do_IRQ(void)
+{
+	unsigned long sp, epsw, irq_disabled_epsw, old_irq_enabled_epsw;
+	int irq;
+
+	sp = current_stack_pointer();
+	if (sp - (sp & ~(THREAD_SIZE - 1)) < STACK_WARN)
+		BUG();
+
+	/* make sure local_irq_enable() doesn't muck up the interrupt priority
+	 * setting in EPSW */
+	old_irq_enabled_epsw = __mn10300_irq_enabled_epsw;
+	local_save_flags(epsw);
+	__mn10300_irq_enabled_epsw = EPSW_IE | (EPSW_IM & epsw);
+	irq_disabled_epsw = EPSW_IE | MN10300_CLI_LEVEL;
+
+	__IRQ_STAT(smp_processor_id(), __irq_count)++;
+
+	irq_enter();
+
+	for (;;) {
+		/* ask the interrupt controller for the next IRQ to process
+		 * - the result we get depends on EPSW.IM
+		 */
+		irq = IAGR & IAGR_GN;
+		if (!irq)
+			break;
+
+		local_irq_restore(irq_disabled_epsw);
+
+		generic_handle_irq(irq >> 2);
+
+		/* restore IRQ controls for IAGR access */
+		local_irq_restore(epsw);
+	}
+
+	__mn10300_irq_enabled_epsw = old_irq_enabled_epsw;
+
+	irq_exit();
+}
+
+/*
+ * Display interrupt management information through /proc/interrupts
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j, cpu;
+	struct irqaction *action;
+	unsigned long flags;
+
+	switch (i) {
+		/* display column title bar naming CPUs */
+	case 0:
+		seq_printf(p, "           ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%d       ", j);
+		seq_putc(p, '\n');
+		break;
+
+		/* display information rows, one per active CPU */
+	case 1 ... NR_IRQS - 1:
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+
+		action = irq_desc[i].action;
+		if (action) {
+			seq_printf(p, "%3d: ", i);
+			for_each_present_cpu(cpu)
+				seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+			seq_printf(p, " %14s.%u", irq_desc[i].chip->name,
+				   (GxICR(i) & GxICR_LEVEL) >>
+				   GxICR_LEVEL_SHIFT);
+			seq_printf(p, "  %s", action->name);
+
+			for (action = action->next;
+			     action;
+			     action = action->next)
+				seq_printf(p, ", %s", action->name);
+
+			seq_putc(p, '\n');
+		}
+
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+		break;
+
+		/* polish off with NMI and error counters */
+	case NR_IRQS:
+		seq_printf(p, "NMI: ");
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "%10u ", nmi_count(j));
+		seq_putc(p, '\n');
+
+		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+		break;
+	}
+
+	return 0;
+}
diff --git a/arch/mn10300/kernel/kernel_execve.S b/arch/mn10300/kernel/kernel_execve.S
new file mode 100644
index 000000000000..86039f105268
--- /dev/null
+++ b/arch/mn10300/kernel/kernel_execve.S
@@ -0,0 +1,37 @@
+/* MN10300 In-kernel program execution
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+###############################################################################
+#
+# Do a system call from kernel instead of calling sys_execve so we end up with
+# proper pt_regs.
+#
+# int kernel_execve(const char *filename, char *const argv[],
+#		    char *const envp[])
+#
+# On entry: D0/D1/8(SP): arguments to function
+# On return: D0: syscall return.
+#
+###############################################################################
+	.globl		kernel_execve
+	.type		kernel_execve,@function
+kernel_execve:
+	mov		a3,a1
+	mov		d0,a0
+	mov		(12,sp),a3
+	mov		+__NR_execve,d0
+	syscall		0
+	mov		a1,a3
+	rets
+
+	.size		kernel_execve,.-kernel_execve
diff --git a/arch/mn10300/kernel/kprobes.c b/arch/mn10300/kernel/kprobes.c
new file mode 100644
index 000000000000..dacafab00eb2
--- /dev/null
+++ b/arch/mn10300/kernel/kprobes.c
@@ -0,0 +1,653 @@
+/* MN10300 Kernel probes implementation
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by Mark Salter (msalter@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public Licence as published by
+ * the Free Software Foundation; either version 2 of the Licence, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public Licence for more details.
+ *
+ * You should have received a copy of the GNU General Public Licence
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/preempt.h>
+#include <linux/kdebug.h>
+#include <asm/cacheflush.h>
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { { NULL, NULL } };
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
+/* kprobe_status settings */
+#define KPROBE_HIT_ACTIVE	0x00000001
+#define KPROBE_HIT_SS		0x00000002
+
+static struct kprobe *current_kprobe;
+static unsigned long current_kprobe_orig_pc;
+static unsigned long current_kprobe_next_pc;
+static int current_kprobe_ss_flags;
+static unsigned long kprobe_status;
+static kprobe_opcode_t current_kprobe_ss_buf[MAX_INSN_SIZE + 2];
+static unsigned long current_kprobe_bp_addr;
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+
+
+/* singlestep flag bits */
+#define SINGLESTEP_BRANCH 1
+#define SINGLESTEP_PCREL  2
+
+#define READ_BYTE(p, valp) \
+	do { *(u8 *)(valp) = *(u8 *)(p); } while (0)
+
+#define READ_WORD16(p, valp)					\
+	do {							\
+		READ_BYTE((p), (valp));				\
+		READ_BYTE((u8 *)(p) + 1, (u8 *)(valp) + 1);	\
+	} while (0)
+
+#define READ_WORD32(p, valp)					\
+	do {							\
+		READ_BYTE((p), (valp));				\
+		READ_BYTE((u8 *)(p) + 1, (u8 *)(valp) + 1);	\
+		READ_BYTE((u8 *)(p) + 2, (u8 *)(valp) + 2);	\
+		READ_BYTE((u8 *)(p) + 3, (u8 *)(valp) + 3);	\
+	} while (0)
+
+
+static const u8 mn10300_insn_sizes[256] =
+{
+	/* 1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+	1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3,	/* 0 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 1 */
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, /* 2 */
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, /* 3 */
+	1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, /* 4 */
+	1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, /* 5 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6 */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 8 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* 9 */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* a */
+	2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, /* b */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 2, /* c */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* e */
+	0, 2, 2, 2, 2, 2, 2, 4, 0, 3, 0, 4, 0, 6, 7, 1  /* f */
+};
+
+#define LT (1 << 0)
+#define GT (1 << 1)
+#define GE (1 << 2)
+#define LE (1 << 3)
+#define CS (1 << 4)
+#define HI (1 << 5)
+#define CC (1 << 6)
+#define LS (1 << 7)
+#define EQ (1 << 8)
+#define NE (1 << 9)
+#define RA (1 << 10)
+#define VC (1 << 11)
+#define VS (1 << 12)
+#define NC (1 << 13)
+#define NS (1 << 14)
+
+static const u16 cond_table[] = {
+	/*  V  C  N  Z  */
+	/*  0  0  0  0  */ (NE | NC | CC | VC | GE | GT | HI),
+	/*  0  0  0  1  */ (EQ | NC | CC | VC | GE | LE | LS),
+	/*  0  0  1  0  */ (NE | NS | CC | VC | LT | LE | HI),
+	/*  0  0  1  1  */ (EQ | NS | CC | VC | LT | LE | LS),
+	/*  0  1  0  0  */ (NE | NC | CS | VC | GE | GT | LS),
+	/*  0  1  0  1  */ (EQ | NC | CS | VC | GE | LE | LS),
+	/*  0  1  1  0  */ (NE | NS | CS | VC | LT | LE | LS),
+	/*  0  1  1  1  */ (EQ | NS | CS | VC | LT | LE | LS),
+	/*  1  0  0  0  */ (NE | NC | CC | VS | LT | LE | HI),
+	/*  1  0  0  1  */ (EQ | NC | CC | VS | LT | LE | LS),
+	/*  1  0  1  0  */ (NE | NS | CC | VS | GE | GT | HI),
+	/*  1  0  1  1  */ (EQ | NS | CC | VS | GE | LE | LS),
+	/*  1  1  0  0  */ (NE | NC | CS | VS | LT | LE | LS),
+	/*  1  1  0  1  */ (EQ | NC | CS | VS | LT | LE | LS),
+	/*  1  1  1  0  */ (NE | NS | CS | VS | GE | GT | LS),
+	/*  1  1  1  1  */ (EQ | NS | CS | VS | GE | LE | LS),
+};
+
+/*
+ * Calculate what the PC will be after executing next instruction
+ */
+static unsigned find_nextpc(struct pt_regs *regs, int *flags)
+{
+	unsigned size;
+	s8  x8;
+	s16 x16;
+	s32 x32;
+	u8 opc, *pc, *sp, *next;
+
+	next = 0;
+	*flags = SINGLESTEP_PCREL;
+
+	pc = (u8 *) regs->pc;
+	sp = (u8 *) (regs + 1);
+	opc = *pc;
+
+	size = mn10300_insn_sizes[opc];
+	if (size > 0) {
+		next = pc + size;
+	} else {
+		switch (opc) {
+			/* Bxx (d8,PC) */
+		case 0xc0 ... 0xca:
+			x8 = 2;
+			if (cond_table[regs->epsw & 0xf] & (1 << (opc & 0xf)))
+				x8 = (s8)pc[1];
+			next = pc + x8;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+			/* JMP (d16,PC) or CALL (d16,PC) */
+		case 0xcc:
+		case 0xcd:
+			READ_WORD16(pc + 1, &x16);
+			next = pc + x16;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+			/* JMP (d32,PC) or CALL (d32,PC) */
+		case 0xdc:
+		case 0xdd:
+			READ_WORD32(pc + 1, &x32);
+			next = pc + x32;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+			/* RETF */
+		case 0xde:
+			next = (u8 *)regs->mdr;
+			*flags &= ~SINGLESTEP_PCREL;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+			/* RET */
+		case 0xdf:
+			sp += pc[2];
+			READ_WORD32(sp, &x32);
+			next = (u8 *)x32;
+			*flags &= ~SINGLESTEP_PCREL;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+		case 0xf0:
+			next = pc + 2;
+			opc = pc[1];
+			if (opc >= 0xf0 && opc <= 0xf7) {
+				/* JMP (An) / CALLS (An) */
+				switch (opc & 3) {
+				case 0:
+					next = (u8 *)regs->a0;
+					break;
+				case 1:
+					next = (u8 *)regs->a1;
+					break;
+				case 2:
+					next = (u8 *)regs->a2;
+					break;
+				case 3:
+					next = (u8 *)regs->a3;
+					break;
+				}
+				*flags &= ~SINGLESTEP_PCREL;
+				*flags |= SINGLESTEP_BRANCH;
+			} else if (opc == 0xfc) {
+				/* RETS */
+				READ_WORD32(sp, &x32);
+				next = (u8 *)x32;
+				*flags &= ~SINGLESTEP_PCREL;
+				*flags |= SINGLESTEP_BRANCH;
+			} else if (opc == 0xfd) {
+				/* RTI */
+				READ_WORD32(sp + 4, &x32);
+				next = (u8 *)x32;
+				*flags &= ~SINGLESTEP_PCREL;
+				*flags |= SINGLESTEP_BRANCH;
+			}
+			break;
+
+			/* potential 3-byte conditional branches */
+		case 0xf8:
+			next = pc + 3;
+			opc = pc[1];
+			if (opc >= 0xe8 && opc <= 0xeb &&
+			    (cond_table[regs->epsw & 0xf] &
+			     (1 << ((opc & 0xf) + 3)))
+			    ) {
+				READ_BYTE(pc+2, &x8);
+				next = pc + x8;
+				*flags |= SINGLESTEP_BRANCH;
+			}
+			break;
+
+		case 0xfa:
+			if (pc[1] == 0xff) {
+				/* CALLS (d16,PC) */
+				READ_WORD16(pc + 2, &x16);
+				next = pc + x16;
+			} else
+				next = pc + 4;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+
+		case 0xfc:
+			x32 = 6;
+			if (pc[1] == 0xff) {
+				/* CALLS (d32,PC) */
+				READ_WORD32(pc + 2, &x32);
+			}
+			next = pc + x32;
+			*flags |= SINGLESTEP_BRANCH;
+			break;
+			/* LXX (d8,PC) */
+			/* SETLB - loads the next four bytes into the LIR reg */
+		case 0xd0 ... 0xda:
+		case 0xdb:
+			panic("Can't singlestep Lxx/SETLB\n");
+			break;
+		}
+	}
+	return (unsigned)next;
+
+}
+
+/*
+ * set up out of place singlestep of some branching instructions
+ */
+static unsigned __kprobes singlestep_branch_setup(struct pt_regs *regs)
+{
+	u8 opc, *pc, *sp, *next;
+
+	next = NULL;
+	pc = (u8 *) regs->pc;
+	sp = (u8 *) (regs + 1);
+
+	switch (pc[0]) {
+	case 0xc0 ... 0xca:	/* Bxx (d8,PC) */
+	case 0xcc:		/* JMP (d16,PC) */
+	case 0xdc:		/* JMP (d32,PC) */
+	case 0xf8:              /* Bxx (d8,PC)  3-byte version */
+		/* don't really need to do anything except cause trap  */
+		next = pc;
+		break;
+
+	case 0xcd:		/* CALL (d16,PC) */
+		pc[1] = 5;
+		pc[2] = 0;
+		next = pc + 5;
+		break;
+
+	case 0xdd:		/* CALL (d32,PC) */
+		pc[1] = 7;
+		pc[2] = 0;
+		pc[3] = 0;
+		pc[4] = 0;
+		next = pc + 7;
+		break;
+
+	case 0xde:		/* RETF */
+		next = pc + 3;
+		regs->mdr = (unsigned) next;
+		break;
+
+	case 0xdf:		/* RET */
+		sp += pc[2];
+		next = pc + 3;
+		*(unsigned *)sp = (unsigned) next;
+		break;
+
+	case 0xf0:
+		next = pc + 2;
+		opc = pc[1];
+		if (opc >= 0xf0 && opc <= 0xf3) {
+			/* CALLS (An) */
+			/* use CALLS (d16,PC) to avoid mucking with An */
+			pc[0] = 0xfa;
+			pc[1] = 0xff;
+			pc[2] = 4;
+			pc[3] = 0;
+			next = pc + 4;
+		} else if (opc >= 0xf4 && opc <= 0xf7) {
+			/* JMP (An) */
+			next = pc;
+		} else if (opc == 0xfc) {
+			/* RETS */
+			next = pc + 2;
+			*(unsigned *) sp = (unsigned) next;
+		} else if (opc == 0xfd) {
+			/* RTI */
+			next = pc + 2;
+			*(unsigned *)(sp + 4) = (unsigned) next;
+		}
+		break;
+
+	case 0xfa:	/* CALLS (d16,PC) */
+		pc[2] = 4;
+		pc[3] = 0;
+		next = pc + 4;
+		break;
+
+	case 0xfc:	/* CALLS (d32,PC) */
+		pc[2] = 6;
+		pc[3] = 0;
+		pc[4] = 0;
+		pc[5] = 0;
+		next = pc + 6;
+		break;
+
+	case 0xd0 ... 0xda:	/* LXX (d8,PC) */
+	case 0xdb:		/* SETLB */
+		panic("Can't singlestep Lxx/SETLB\n");
+	}
+
+	return (unsigned) next;
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	mn10300_dcache_flush();
+	mn10300_icache_inv();
+}
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+static inline
+void __kprobes disarm_kprobe(struct kprobe *p, struct pt_regs *regs)
+{
+	*p->addr = p->opcode;
+	regs->pc = (unsigned long) p->addr;
+	mn10300_dcache_flush();
+	mn10300_icache_inv();
+}
+
+static inline
+void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long nextpc;
+
+	current_kprobe_orig_pc = regs->pc;
+	memcpy(current_kprobe_ss_buf, &p->ainsn.insn[0], MAX_INSN_SIZE);
+	regs->pc = (unsigned long) current_kprobe_ss_buf;
+
+	nextpc = find_nextpc(regs, &current_kprobe_ss_flags);
+	if (current_kprobe_ss_flags & SINGLESTEP_PCREL)
+		current_kprobe_next_pc =
+			current_kprobe_orig_pc + (nextpc - regs->pc);
+	else
+		current_kprobe_next_pc = nextpc;
+
+	/* branching instructions need special handling */
+	if (current_kprobe_ss_flags & SINGLESTEP_BRANCH)
+		nextpc = singlestep_branch_setup(regs);
+
+	current_kprobe_bp_addr = nextpc;
+
+	*(u8 *) nextpc = BREAKPOINT_INSTRUCTION;
+	mn10300_dcache_flush_range2((unsigned) current_kprobe_ss_buf,
+				    sizeof(current_kprobe_ss_buf));
+	mn10300_icache_inv();
+}
+
+static inline int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	unsigned int *addr = (unsigned int *) regs->pc;
+
+	/* We're in an interrupt, but this is clear and BUG()-safe. */
+	preempt_disable();
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		/* We *are* holding lock here, so this is safe.
+		   Disarm the probe we just hit, and ignore it. */
+		p = get_kprobe(addr);
+		if (p) {
+			disarm_kprobe(p, regs);
+			ret = 1;
+		} else {
+			p = current_kprobe;
+			if (p->break_handler && p->break_handler(p, regs))
+				goto ss_probe;
+		}
+		/* If it's not ours, can't be delete race, (we hold lock). */
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (*addr != BREAKPOINT_INSTRUCTION) {
+			/* The breakpoint instruction was removed right after
+			 * we hit it.  Another cpu has removed either a
+			 * probepoint or a debugger breakpoint at this address.
+			 * In either case, no further handling of this
+			 * interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	kprobe_status = KPROBE_HIT_ACTIVE;
+	current_kprobe = p;
+	if (p->pre_handler(p, regs)) {
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+	}
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	/* we may need to fixup regs/stack after singlestepping a call insn */
+	if (current_kprobe_ss_flags & SINGLESTEP_BRANCH) {
+		regs->pc = current_kprobe_orig_pc;
+		switch (p->ainsn.insn[0]) {
+		case 0xcd:	/* CALL (d16,PC) */
+			*(unsigned *) regs->sp = regs->mdr = regs->pc + 5;
+			break;
+		case 0xdd:	/* CALL (d32,PC) */
+			/* fixup mdr and return address on stack */
+			*(unsigned *) regs->sp = regs->mdr = regs->pc + 7;
+			break;
+		case 0xf0:
+			if (p->ainsn.insn[1] >= 0xf0 &&
+			    p->ainsn.insn[1] <= 0xf3) {
+				/* CALLS (An) */
+				/* fixup MDR and return address on stack */
+				regs->mdr = regs->pc + 2;
+				*(unsigned *) regs->sp = regs->mdr;
+			}
+			break;
+
+		case 0xfa:	/* CALLS (d16,PC) */
+			/* fixup MDR and return address on stack */
+			*(unsigned *) regs->sp = regs->mdr = regs->pc + 4;
+			break;
+
+		case 0xfc:	/* CALLS (d32,PC) */
+			/* fixup MDR and return address on stack */
+			*(unsigned *) regs->sp = regs->mdr = regs->pc + 6;
+			break;
+		}
+	}
+
+	regs->pc = current_kprobe_next_pc;
+	current_kprobe_bp_addr = 0;
+}
+
+static inline int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	if (!kprobe_running())
+		return 0;
+
+	if (current_kprobe->post_handler)
+		current_kprobe->post_handler(current_kprobe, regs, 0);
+
+	resume_execution(current_kprobe, regs);
+	reset_current_kprobe();
+	preempt_enable_no_resched();
+	return 1;
+}
+
+/* Interrupts disabled, kprobe_lock held. */
+static inline
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	if (current_kprobe->fault_handler &&
+	    current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+		return 1;
+
+	if (kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(current_kprobe, regs);
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = data;
+
+	switch (val) {
+	case DIE_BREAKPOINT:
+		if (current_kprobe_bp_addr != args->regs->pc) {
+			if (kprobe_handler(args->regs))
+				return NOTIFY_STOP;
+		} else {
+			if (post_kprobe_handler(args->regs))
+				return NOTIFY_STOP;
+		}
+		break;
+	case DIE_GPF:
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			return NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/* Jprobes support.  */
+static struct pt_regs jprobe_saved_regs;
+static struct pt_regs *jprobe_saved_regs_location;
+static kprobe_opcode_t jprobe_saved_stack[MAX_STACK_SIZE];
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+	jprobe_saved_regs_location = regs;
+	memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/* Save a whole stack frame, this gets arguments
+	 * pushed onto the stack after using up all the
+	 * arg registers.
+	 */
+	memcpy(&jprobe_saved_stack, regs + 1, sizeof(jprobe_saved_stack));
+
+	/* setup return addr to the jprobe handler routine */
+	regs->pc = (unsigned long) jp->entry;
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	void *orig_sp = jprobe_saved_regs_location + 1;
+
+	preempt_enable_no_resched();
+	asm volatile("		mov	%0,sp\n"
+		     ".globl	jprobe_return_bp_addr\n"
+		     "jprobe_return_bp_addr:\n\t"
+		     "		.byte	0xff\n"
+		     : : "d" (orig_sp));
+}
+
+extern void jprobe_return_bp_addr(void);
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	u8 *addr = (u8 *) regs->pc;
+
+	if (addr == (u8 *) jprobe_return_bp_addr) {
+		if (jprobe_saved_regs_location != regs) {
+			printk(KERN_ERR"JPROBE:"
+			       " Current regs (%p) does not match saved regs"
+			       " (%p).\n",
+			       regs, jprobe_saved_regs_location);
+			BUG();
+		}
+
+		/* Restore old register state.
+		 */
+		memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
+
+		memcpy(regs + 1, &jprobe_saved_stack,
+		       sizeof(jprobe_saved_stack));
+		return 1;
+	}
+	return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+	return 0;
+}
diff --git a/arch/mn10300/kernel/kthread.S b/arch/mn10300/kernel/kthread.S
new file mode 100644
index 000000000000..b5ae467ac5ec
--- /dev/null
+++ b/arch/mn10300/kernel/kthread.S
@@ -0,0 +1,31 @@
+/* MN10300 Kernel thread trampoline function
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by Mark Salter (msalter@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+	.text
+
+###############################################################################
+#
+# kernel_thread_helper - trampoline for kernel_thread()
+#
+# On entry:
+#   A2 = address of function to call
+#   D2 = function argument
+#
+###############################################################################
+	.globl	kernel_thread_helper
+	.type	kernel_thread_helper,@function
+kernel_thread_helper:
+	mov	do_exit,d1
+	mov	d1,(sp)
+	mov	d1,mdr
+	mov	d2,d0
+	jmp	(a2)
+
+	.size	kernel_thread_helper,.-kernel_thread_helper
diff --git a/arch/mn10300/kernel/mn10300-debug.c b/arch/mn10300/kernel/mn10300-debug.c
new file mode 100644
index 000000000000..bd8196478cbc
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-debug.c
@@ -0,0 +1,58 @@
+/* Debugging stuff for the MN10300-based processors
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <asm/serial-regs.h>
+
+#undef MN10300_CONSOLE_ON_SERIO
+
+/*
+ * write a string directly through one of the serial ports on-board the MN10300
+ */
+#ifdef MN10300_CONSOLE_ON_SERIO
+void debug_to_serial_mnser(const char *p, int n)
+{
+	char ch;
+
+	for (; n > 0; n--) {
+		ch = *p++;
+
+#if MN10300_CONSOLE_ON_SERIO == 0
+		while (SC0STR & (SC01STR_TBF)) continue;
+		SC0TXB = ch;
+		while (SC0STR & (SC01STR_TBF)) continue;
+		if (ch == 0x0a) {
+			SC0TXB = 0x0d;
+			while (SC0STR & (SC01STR_TBF)) continue;
+		}
+
+#elif MN10300_CONSOLE_ON_SERIO == 1
+		while (SC1STR & (SC01STR_TBF)) continue;
+		SC1TXB = ch;
+		while (SC1STR & (SC01STR_TBF)) continue;
+		if (ch == 0x0a) {
+			SC1TXB = 0x0d;
+			while (SC1STR & (SC01STR_TBF)) continue;
+		}
+
+#elif MN10300_CONSOLE_ON_SERIO == 2
+		while (SC2STR & (SC2STR_TBF)) continue;
+		SC2TXB = ch;
+		while (SC2STR & (SC2STR_TBF)) continue;
+		if (ch == 0x0a) {
+			SC2TXB = 0x0d;
+			while (SC2STR & (SC2STR_TBF)) continue;
+		}
+
+#endif
+	}
+}
+#endif
+
diff --git a/arch/mn10300/kernel/mn10300-serial-low.S b/arch/mn10300/kernel/mn10300-serial-low.S
new file mode 100644
index 000000000000..ef3f4c1df2a4
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-serial-low.S
@@ -0,0 +1,191 @@
+###############################################################################
+#
+# Virtual DMA driver for MN10300 serial ports
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/smp.h>
+#include <asm/cpu-regs.h>
+#include <asm/frame.inc>
+#include <asm/timer-regs.h>
+#include <asm/proc/cache.h>
+#include <asm/unit/timex.h>
+#include "mn10300-serial.h"
+
+#define	SCxCTR	0x00
+#define	SCxICR	0x04
+#define	SCxTXB	0x08
+#define	SCxRXB	0x09
+#define	SCxSTR	0x0c
+#define	SCxTIM	0x0d
+
+	.text
+
+###############################################################################
+#
+# serial port interrupt virtual DMA entry point
+# - intended to run at interrupt priority 1 (not affected by local_irq_disable)
+#
+###############################################################################
+	.balign	L1_CACHE_BYTES
+ENTRY(mn10300_serial_vdma_interrupt)
+	or	EPSW_IE,psw			# permit overriding by
+						# debugging interrupts
+	movm	[d2,d3,a2,a3,exreg0],(sp)
+
+	movhu	(IAGR),a2			# see if which interrupt is
+						# pending
+	and	IAGR_GN,a2
+	add	a2,a2
+	add	mn10300_serial_int_tbl,a2
+
+	mov	(a2+),a3
+	mov	(__iobase,a3),e2
+	mov	(a2),a2
+	jmp	(a2)
+
+###############################################################################
+#
+# serial port receive interrupt virtual DMA entry point
+# - intended to run at interrupt priority 1 (not affected by local_irq_disable)
+# - stores data/status byte pairs in the ring buffer
+# - induces a scheduler tick timer interrupt when done, which we then subvert
+# on entry:
+#	A3	struct mn10300_serial_port *
+#	E2	I/O port base
+#
+###############################################################################
+ENTRY(mn10300_serial_vdma_rx_handler)
+	mov	(__rx_icr,a3),e3
+	mov	GxICR_DETECT,d2
+	movbu	d2,(e3)				# ACK the interrupt
+	movhu	(e3),d2				# flush
+
+	mov	(__rx_inp,a3),d3
+	mov	d3,a2
+	add	2,d3
+	and	MNSC_BUFFER_SIZE-1,d3
+	mov	(__rx_outp,a3),d2
+	cmp	d3,d2
+	beq	mnsc_vdma_rx_overflow
+
+	mov	(__rx_buffer,a3),d2
+	add	d2,a2
+	movhu	(SCxSTR,e2),d2
+	movbu	d2,(1,a2)
+	movbu	(SCxRXB,e2),d2
+	movbu	d2,(a2)
+	mov	d3,(__rx_inp,a3)
+	bset	MNSCx_RX_AVAIL,(__intr_flags,a3)
+
+mnsc_vdma_rx_done:
+	mov	(__tm_icr,a3),a2
+	mov	GxICR_LEVEL_6|GxICR_ENABLE|GxICR_REQUEST|GxICR_DETECT,d2
+	movhu	d2,(a2)				# request a slow interrupt
+	movhu	(a2),d2				# flush
+
+	movm	(sp),[d2,d3,a2,a3,exreg0]
+	rti
+
+mnsc_vdma_rx_overflow:
+	bset	MNSCx_RX_OVERF,(__intr_flags,a3)
+	bra	mnsc_vdma_rx_done
+
+###############################################################################
+#
+# serial port transmit interrupt virtual DMA entry point
+# - intended to run at interrupt priority 1 (not affected by local_irq_disable)
+# - retrieves data bytes from the ring buffer and passes them to the serial port
+# - induces a scheduler tick timer interrupt when done, which we then subvert
+#	A3	struct mn10300_serial_port *
+#	E2	I/O port base
+#
+###############################################################################
+	.balign	L1_CACHE_BYTES
+ENTRY(mn10300_serial_vdma_tx_handler)
+	mov	(__tx_icr,a3),e3
+	mov	GxICR_DETECT,d2
+	movbu	d2,(e3)			# ACK the interrupt
+	movhu	(e3),d2			# flush
+
+	btst	0x01,(__tx_break,a3)	# handle transmit break request
+	bne	mnsc_vdma_tx_break
+
+	movbu	(SCxSTR,e2),d2		# don't try and transmit a char if the
+					# buffer is not empty
+	btst	SC01STR_TBF,d2		# (may have tried to jumpstart)
+	bne	mnsc_vdma_tx_noint
+
+	movbu	(__tx_xchar,a3),d2	# handle hi-pri XON/XOFF
+	or	d2,d2
+	bne	mnsc_vdma_tx_xchar
+
+	mov	(__tx_info_buffer,a3),a2 # get the uart_info struct for Tx
+	mov	(__xmit_tail,a2),d3
+	mov	(__xmit_head,a2),d2
+	cmp	d3,d2
+	beq	mnsc_vdma_tx_empty
+
+	mov	(__xmit_buffer,a2),d2	# get a char from the buffer and
+					# transmit it
+	movbu	(d3,d2),d2
+	movbu	d2,(SCxTXB,e2)		# Tx
+
+	inc	d3			# advance the buffer pointer
+	and	__UART_XMIT_SIZE-1,d3
+	mov	(__xmit_head,a2),d2
+	mov	d3,(__xmit_tail,a2)
+
+	sub	d3,d2			# see if we've written everything
+	beq	mnsc_vdma_tx_empty
+
+	and	__UART_XMIT_SIZE-1,d2	# see if we just made a hole
+	cmp	__UART_XMIT_SIZE-2,d2
+	beq	mnsc_vdma_tx_made_hole
+
+mnsc_vdma_tx_done:
+	mov	(__tm_icr,a3),a2
+	mov	GxICR_LEVEL_6|GxICR_ENABLE|GxICR_REQUEST|GxICR_DETECT,d2
+	movhu	d2,(a2)			# request a slow interrupt
+	movhu	(a2),d2			# flush
+
+mnsc_vdma_tx_noint:
+	movm	(sp),[d2,d3,a2,a3,exreg0]
+	rti
+
+mnsc_vdma_tx_empty:
+	mov	+(GxICR_LEVEL_1|GxICR_DETECT),d2
+	movhu	d2,(e3)			# disable the interrupt
+	movhu	(e3),d2			# flush
+
+	bset	MNSCx_TX_EMPTY,(__intr_flags,a3)
+	bra	mnsc_vdma_tx_done
+
+mnsc_vdma_tx_break:
+	movhu	(SCxCTR,e2),d2		# turn on break mode
+	or	SC01CTR_BKE,d2
+	movhu	d2,(SCxCTR,e2)
+	mov	+(GxICR_LEVEL_1|GxICR_DETECT),d2
+	movhu	d2,(e3)			# disable transmit interrupts on this
+					# channel
+	movhu	(e3),d2			# flush
+	bra	mnsc_vdma_tx_noint
+
+mnsc_vdma_tx_xchar:
+	bclr	0xff,(__tx_xchar,a3)
+	movbu	d2,(SCxTXB,e2)
+	bra	mnsc_vdma_tx_done
+
+mnsc_vdma_tx_made_hole:
+	bset	MNSCx_TX_SPACE,(__intr_flags,a3)
+	bra	mnsc_vdma_tx_done
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
new file mode 100644
index 000000000000..b9c268c6b2fb
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -0,0 +1,1480 @@
+/* MN10300 On-chip serial port UART driver
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+static const char serial_name[] = "MN10300 Serial driver";
+static const char serial_version[] = "mn10300_serial-1.0";
+static const char serial_revdate[] = "2007-11-06";
+
+#if defined(CONFIG_MN10300_TTYSM_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/circ_buf.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/serial-regs.h>
+#include <asm/unit/timex.h>
+#include "mn10300-serial.h"
+
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT, ...) \
+	printk(KERN_DEBUG "-->%s(" FMT ")\n", __func__, ##__VA_ARGS__)
+#define _enter(FMT, ...) \
+	no_printk(KERN_DEBUG "-->%s(" FMT ")\n", __func__, ##__VA_ARGS__)
+#define kdebug(FMT, ...) \
+	printk(KERN_DEBUG "--- " FMT "\n", ##__VA_ARGS__)
+#define _debug(FMT, ...) \
+	no_printk(KERN_DEBUG "--- " FMT "\n", ##__VA_ARGS__)
+#define kproto(FMT, ...) \
+	printk(KERN_DEBUG "### MNSERIAL " FMT " ###\n", ##__VA_ARGS__)
+#define _proto(FMT, ...) \
+	no_printk(KERN_DEBUG "### MNSERIAL " FMT " ###\n", ##__VA_ARGS__)
+
+#define NR_UARTS 3
+
+#ifdef CONFIG_MN10300_TTYSM_CONSOLE
+static void mn10300_serial_console_write(struct console *co,
+					   const char *s, unsigned count);
+static int __init mn10300_serial_console_setup(struct console *co,
+						 char *options);
+
+static struct uart_driver mn10300_serial_driver;
+static struct console mn10300_serial_console = {
+	.name		= "ttySM",
+	.write		= mn10300_serial_console_write,
+	.device		= uart_console_device,
+	.setup		= mn10300_serial_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &mn10300_serial_driver,
+};
+#endif
+
+static struct uart_driver mn10300_serial_driver = {
+	.owner		= NULL,
+	.driver_name	= "mn10300-serial",
+	.dev_name	= "ttySM",
+	.major		= TTY_MAJOR,
+	.minor		= 128,
+	.nr		= NR_UARTS,
+#ifdef CONFIG_MN10300_TTYSM_CONSOLE
+	.cons		= &mn10300_serial_console,
+#endif
+};
+
+static unsigned int mn10300_serial_tx_empty(struct uart_port *);
+static void mn10300_serial_set_mctrl(struct uart_port *, unsigned int mctrl);
+static unsigned int mn10300_serial_get_mctrl(struct uart_port *);
+static void mn10300_serial_stop_tx(struct uart_port *);
+static void mn10300_serial_start_tx(struct uart_port *);
+static void mn10300_serial_send_xchar(struct uart_port *, char ch);
+static void mn10300_serial_stop_rx(struct uart_port *);
+static void mn10300_serial_enable_ms(struct uart_port *);
+static void mn10300_serial_break_ctl(struct uart_port *, int ctl);
+static int mn10300_serial_startup(struct uart_port *);
+static void mn10300_serial_shutdown(struct uart_port *);
+static void mn10300_serial_set_termios(struct uart_port *,
+					 struct ktermios *new,
+					 struct ktermios *old);
+static const char *mn10300_serial_type(struct uart_port *);
+static void mn10300_serial_release_port(struct uart_port *);
+static int mn10300_serial_request_port(struct uart_port *);
+static void mn10300_serial_config_port(struct uart_port *, int);
+static int mn10300_serial_verify_port(struct uart_port *,
+					struct serial_struct *);
+
+static const struct uart_ops mn10300_serial_ops = {
+	.tx_empty	= mn10300_serial_tx_empty,
+	.set_mctrl	= mn10300_serial_set_mctrl,
+	.get_mctrl	= mn10300_serial_get_mctrl,
+	.stop_tx	= mn10300_serial_stop_tx,
+	.start_tx	= mn10300_serial_start_tx,
+	.send_xchar	= mn10300_serial_send_xchar,
+	.stop_rx	= mn10300_serial_stop_rx,
+	.enable_ms	= mn10300_serial_enable_ms,
+	.break_ctl	= mn10300_serial_break_ctl,
+	.startup	= mn10300_serial_startup,
+	.shutdown	= mn10300_serial_shutdown,
+	.set_termios	= mn10300_serial_set_termios,
+	.type		= mn10300_serial_type,
+	.release_port	= mn10300_serial_release_port,
+	.request_port	= mn10300_serial_request_port,
+	.config_port	= mn10300_serial_config_port,
+	.verify_port	= mn10300_serial_verify_port,
+};
+
+static irqreturn_t mn10300_serial_interrupt(int irq, void *dev_id);
+
+/*
+ * the first on-chip serial port: ttySM0 (aka SIF0)
+ */
+#ifdef CONFIG_MN10300_TTYSM0
+struct mn10300_serial_port mn10300_serial_port_sif0 = {
+	.uart.ops	= &mn10300_serial_ops,
+	.uart.membase	= (void __iomem *) &SC0CTR,
+	.uart.mapbase	= (unsigned long) &SC0CTR,
+	.uart.iotype	= UPIO_MEM,
+	.uart.irq	= 0,
+	.uart.uartclk	= 0, /* MN10300_IOCLK, */
+	.uart.fifosize	= 1,
+	.uart.flags	= UPF_BOOT_AUTOCONF,
+	.uart.line	= 0,
+	.uart.type	= PORT_MN10300,
+	.uart.lock	=
+	__SPIN_LOCK_UNLOCKED(mn10300_serial_port_sif0.uart.lock),
+	.name		= "ttySM0",
+	._iobase	= &SC0CTR,
+	._control	= &SC0CTR,
+	._status	= (volatile u8 *) &SC0STR,
+	._intr		= &SC0ICR,
+	._rxb		= &SC0RXB,
+	._txb		= &SC0TXB,
+	.rx_name	= "ttySM0/Rx",
+	.tx_name	= "ttySM0/Tx",
+#ifdef CONFIG_MN10300_TTYSM0_TIMER8
+	.tm_name	= "ttySM0/Timer8",
+	._tmxmd		= &TM8MD,
+	._tmxbr		= &TM8BR,
+	._tmicr		= &TM8ICR,
+	.tm_irq		= TM8IRQ,
+	.div_timer	= MNSCx_DIV_TIMER_16BIT,
+#else /* CONFIG_MN10300_TTYSM0_TIMER2 */
+	.tm_name	= "ttySM0/Timer2",
+	._tmxmd		= &TM2MD,
+	._tmxbr		= (volatile u16 *) &TM2BR,
+	._tmicr		= &TM2ICR,
+	.tm_irq		= TM2IRQ,
+	.div_timer	= MNSCx_DIV_TIMER_8BIT,
+#endif
+	.rx_irq		= SC0RXIRQ,
+	.tx_irq		= SC0TXIRQ,
+	.rx_icr		= &GxICR(SC0RXIRQ),
+	.tx_icr		= &GxICR(SC0TXIRQ),
+	.clock_src	= MNSCx_CLOCK_SRC_IOCLK,
+	.options	= 0,
+#ifdef CONFIG_GDBSTUB_ON_TTYSM0
+	.gdbstub	= 1,
+#endif
+};
+#endif /* CONFIG_MN10300_TTYSM0 */
+
+/*
+ * the second on-chip serial port: ttySM1 (aka SIF1)
+ */
+#ifdef CONFIG_MN10300_TTYSM1
+struct mn10300_serial_port mn10300_serial_port_sif1 = {
+	.uart.ops	= &mn10300_serial_ops,
+	.uart.membase	= (void __iomem *) &SC1CTR,
+	.uart.mapbase	= (unsigned long) &SC1CTR,
+	.uart.iotype	= UPIO_MEM,
+	.uart.irq	= 0,
+	.uart.uartclk	= 0, /* MN10300_IOCLK, */
+	.uart.fifosize	= 1,
+	.uart.flags	= UPF_BOOT_AUTOCONF,
+	.uart.line	= 1,
+	.uart.type	= PORT_MN10300,
+	.uart.lock	=
+	__SPIN_LOCK_UNLOCKED(mn10300_serial_port_sif1.uart.lock),
+	.name		= "ttySM1",
+	._iobase	= &SC1CTR,
+	._control	= &SC1CTR,
+	._status	= (volatile u8 *) &SC1STR,
+	._intr		= &SC1ICR,
+	._rxb		= &SC1RXB,
+	._txb		= &SC1TXB,
+	.rx_name	= "ttySM1/Rx",
+	.tx_name	= "ttySM1/Tx",
+#ifdef CONFIG_MN10300_TTYSM1_TIMER9
+	.tm_name	= "ttySM1/Timer9",
+	._tmxmd		= &TM9MD,
+	._tmxbr		= &TM9BR,
+	._tmicr		= &TM9ICR,
+	.tm_irq		= TM9IRQ,
+	.div_timer	= MNSCx_DIV_TIMER_16BIT,
+#else /* CONFIG_MN10300_TTYSM1_TIMER3 */
+	.tm_name	= "ttySM1/Timer3",
+	._tmxmd		= &TM3MD,
+	._tmxbr		= (volatile u16 *) &TM3BR,
+	._tmicr		= &TM3ICR,
+	.tm_irq		= TM3IRQ,
+	.div_timer	= MNSCx_DIV_TIMER_8BIT,
+#endif
+	.rx_irq		= SC1RXIRQ,
+	.tx_irq		= SC1TXIRQ,
+	.rx_icr		= &GxICR(SC1RXIRQ),
+	.tx_icr		= &GxICR(SC1TXIRQ),
+	.clock_src	= MNSCx_CLOCK_SRC_IOCLK,
+	.options	= 0,
+#ifdef CONFIG_GDBSTUB_ON_TTYSM1
+	.gdbstub	= 1,
+#endif
+};
+#endif /* CONFIG_MN10300_TTYSM1 */
+
+/*
+ * the third on-chip serial port: ttySM2 (aka SIF2)
+ */
+#ifdef CONFIG_MN10300_TTYSM2
+struct mn10300_serial_port mn10300_serial_port_sif2 = {
+	.uart.ops	= &mn10300_serial_ops,
+	.uart.membase	= (void __iomem *) &SC2CTR,
+	.uart.mapbase	= (unsigned long) &SC2CTR,
+	.uart.iotype	= UPIO_MEM,
+	.uart.irq	= 0,
+	.uart.uartclk	= 0, /* MN10300_IOCLK, */
+	.uart.fifosize	= 1,
+	.uart.flags	= UPF_BOOT_AUTOCONF,
+	.uart.line	= 2,
+#ifdef CONFIG_MN10300_TTYSM2_CTS
+	.uart.type	= PORT_MN10300_CTS,
+#else
+	.uart.type	= PORT_MN10300,
+#endif
+	.uart.lock	=
+	__SPIN_LOCK_UNLOCKED(mn10300_serial_port_sif2.uart.lock),
+	.name		= "ttySM2",
+	.rx_name	= "ttySM2/Rx",
+	.tx_name	= "ttySM2/Tx",
+	.tm_name	= "ttySM2/Timer10",
+	._iobase	= &SC2CTR,
+	._control	= &SC2CTR,
+	._status	= &SC2STR,
+	._intr		= &SC2ICR,
+	._rxb		= &SC2RXB,
+	._txb		= &SC2TXB,
+	._tmxmd		= &TM10MD,
+	._tmxbr		= &TM10BR,
+	._tmicr		= &TM10ICR,
+	.tm_irq		= TM10IRQ,
+	.div_timer	= MNSCx_DIV_TIMER_16BIT,
+	.rx_irq		= SC2RXIRQ,
+	.tx_irq		= SC2TXIRQ,
+	.rx_icr		= &GxICR(SC2RXIRQ),
+	.tx_icr		= &GxICR(SC2TXIRQ),
+	.clock_src	= MNSCx_CLOCK_SRC_IOCLK,
+#ifdef CONFIG_MN10300_TTYSM2_CTS
+	.options	= MNSCx_OPT_CTS,
+#else
+	.options	= 0,
+#endif
+#ifdef CONFIG_GDBSTUB_ON_TTYSM2
+	.gdbstub	= 1,
+#endif
+};
+#endif /* CONFIG_MN10300_TTYSM2 */
+
+
+/*
+ * list of available serial ports
+ */
+struct mn10300_serial_port *mn10300_serial_ports[NR_UARTS + 1] = {
+#ifdef CONFIG_MN10300_TTYSM0
+	[0]	= &mn10300_serial_port_sif0,
+#endif
+#ifdef CONFIG_MN10300_TTYSM1
+	[1]	= &mn10300_serial_port_sif1,
+#endif
+#ifdef CONFIG_MN10300_TTYSM2
+	[2]	= &mn10300_serial_port_sif2,
+#endif
+	[NR_UARTS] = NULL,
+};
+
+
+/*
+ * we abuse the serial ports' baud timers' interrupt lines to get the ability
+ * to deliver interrupts to userspace as we use the ports' interrupt lines to
+ * do virtual DMA on account of the ports having no hardware FIFOs
+ *
+ * we can generate an interrupt manually in the assembly stubs by writing to
+ * the enable and detect bits in the interrupt control register, so all we need
+ * to do here is disable the interrupt line
+ *
+ * note that we can't just leave the line enabled as the baud rate timer *also*
+ * generates interrupts
+ */
+static void mn10300_serial_mask_ack(unsigned int irq)
+{
+	u16 tmp;
+	GxICR(irq) = GxICR_LEVEL_6;
+	tmp = GxICR(irq); /* flush write buffer */
+}
+
+static void mn10300_serial_nop(unsigned int irq)
+{
+}
+
+static struct irq_chip mn10300_serial_pic = {
+	.name		= "mnserial",
+	.ack		= mn10300_serial_mask_ack,
+	.mask		= mn10300_serial_mask_ack,
+	.mask_ack	= mn10300_serial_mask_ack,
+	.unmask		= mn10300_serial_nop,
+	.end		= mn10300_serial_nop,
+};
+
+
+/*
+ * serial virtual DMA interrupt jump table
+ */
+struct mn10300_serial_int mn10300_serial_int_tbl[NR_IRQS];
+
+static void mn10300_serial_dis_tx_intr(struct mn10300_serial_port *port)
+{
+	u16 x;
+	*port->tx_icr = GxICR_LEVEL_1 | GxICR_DETECT;
+	x = *port->tx_icr;
+}
+
+static void mn10300_serial_en_tx_intr(struct mn10300_serial_port *port)
+{
+	u16 x;
+	*port->tx_icr = GxICR_LEVEL_1 | GxICR_ENABLE;
+	x = *port->tx_icr;
+}
+
+static void mn10300_serial_dis_rx_intr(struct mn10300_serial_port *port)
+{
+	u16 x;
+	*port->rx_icr = GxICR_LEVEL_1 | GxICR_DETECT;
+	x = *port->rx_icr;
+}
+
+/*
+ * multi-bit equivalent of test_and_clear_bit()
+ */
+static int mask_test_and_clear(volatile u8 *ptr, u8 mask)
+{
+	u32 epsw;
+	asm volatile("	bclr	%1,(%2)		\n"
+		     "	mov	epsw,%0		\n"
+		     : "=d"(epsw) : "d"(mask), "a"(ptr));
+	return !(epsw & EPSW_FLAG_Z);
+}
+
+/*
+ * receive chars from the ring buffer for this serial port
+ * - must do break detection here (not done in the UART)
+ */
+static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
+{
+	struct uart_icount *icount = &port->uart.icount;
+	struct tty_struct *tty = port->uart.info->tty;
+	unsigned ix;
+	int count;
+	u8 st, ch, push, status, overrun;
+
+	_enter("%s", port->name);
+
+	push = 0;
+
+	count = CIRC_CNT(port->rx_inp, port->rx_outp, MNSC_BUFFER_SIZE);
+	count = tty_buffer_request_room(tty, count);
+	if (count == 0) {
+		if (!tty->low_latency)
+			tty_flip_buffer_push(tty);
+		return;
+	}
+
+try_again:
+	/* pull chars out of the hat */
+	ix = port->rx_outp;
+	if (ix == port->rx_inp) {
+		if (push && !tty->low_latency)
+			tty_flip_buffer_push(tty);
+		return;
+	}
+
+	ch = port->rx_buffer[ix++];
+	st = port->rx_buffer[ix++];
+	smp_rmb();
+	port->rx_outp = ix & (MNSC_BUFFER_SIZE - 1);
+	port->uart.icount.rx++;
+
+	st &= SC01STR_FEF | SC01STR_PEF | SC01STR_OEF;
+	status = 0;
+	overrun = 0;
+
+	/* the UART doesn't detect BREAK, so we have to do that ourselves
+	 * - it starts as a framing error on a NUL character
+	 * - then we count another two NUL characters before issuing TTY_BREAK
+	 * - then we end on a normal char or one that has all the bottom bits
+	 *   zero and the top bits set
+	 */
+	switch (port->rx_brk) {
+	case 0:
+		/* not breaking at the moment */
+		break;
+
+	case 1:
+		if (st & SC01STR_FEF && ch == 0) {
+			port->rx_brk = 2;
+			goto try_again;
+		}
+		goto not_break;
+
+	case 2:
+		if (st & SC01STR_FEF && ch == 0) {
+			port->rx_brk = 3;
+			_proto("Rx Break Detected");
+			icount->brk++;
+			if (uart_handle_break(&port->uart))
+				goto ignore_char;
+			status |= 1 << TTY_BREAK;
+			goto insert;
+		}
+		goto not_break;
+
+	default:
+		if (st & (SC01STR_FEF | SC01STR_PEF | SC01STR_OEF))
+			goto try_again; /* still breaking */
+
+		port->rx_brk = 0; /* end of the break */
+
+		switch (ch) {
+		case 0xFF:
+		case 0xFE:
+		case 0xFC:
+		case 0xF8:
+		case 0xF0:
+		case 0xE0:
+		case 0xC0:
+		case 0x80:
+		case 0x00:
+			/* discard char at probable break end */
+			goto try_again;
+		}
+		break;
+	}
+
+process_errors:
+	/* handle framing error */
+	if (st & SC01STR_FEF) {
+		if (ch == 0) {
+			/* framing error with NUL char is probably a BREAK */
+			port->rx_brk = 1;
+			goto try_again;
+		}
+
+		_proto("Rx Framing Error");
+		icount->frame++;
+		status |= 1 << TTY_FRAME;
+	}
+
+	/* handle parity error */
+	if (st & SC01STR_PEF) {
+		_proto("Rx Parity Error");
+		icount->parity++;
+		status = TTY_PARITY;
+	}
+
+	/* handle normal char */
+	if (status == 0) {
+		if (uart_handle_sysrq_char(&port->uart, ch))
+			goto ignore_char;
+		status = (1 << TTY_NORMAL);
+	}
+
+	/* handle overrun error */
+	if (st & SC01STR_OEF) {
+		if (port->rx_brk)
+			goto try_again;
+
+		_proto("Rx Overrun Error");
+		icount->overrun++;
+		overrun = 1;
+	}
+
+insert:
+	status &= port->uart.read_status_mask;
+
+	if (!overrun && !(status & port->uart.ignore_status_mask)) {
+		int flag;
+
+		if (status & (1 << TTY_BREAK))
+			flag = TTY_BREAK;
+		else if (status & (1 << TTY_PARITY))
+			flag = TTY_PARITY;
+		else if (status & (1 << TTY_FRAME))
+			flag = TTY_FRAME;
+		else
+			flag = TTY_NORMAL;
+
+		tty_insert_flip_char(tty, ch, flag);
+	}
+
+	/* overrun is special, since it's reported immediately, and doesn't
+	 * affect the current character
+	 */
+	if (overrun)
+		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+
+	count--;
+	if (count <= 0) {
+		if (!tty->low_latency)
+			tty_flip_buffer_push(tty);
+		return;
+	}
+
+ignore_char:
+	push = 1;
+	goto try_again;
+
+not_break:
+	port->rx_brk = 0;
+	goto process_errors;
+}
+
+/*
+ * handle an interrupt from the serial transmission "virtual DMA" driver
+ * - note: the interrupt routine will disable its own interrupts when the Tx
+ *   buffer is empty
+ */
+static void mn10300_serial_transmit_interrupt(struct mn10300_serial_port *port)
+{
+	_enter("%s", port->name);
+
+	if (uart_tx_stopped(&port->uart) ||
+	    uart_circ_empty(&port->uart.info->xmit))
+		mn10300_serial_dis_tx_intr(port);
+
+	if (uart_circ_chars_pending(&port->uart.info->xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&port->uart);
+}
+
+/*
+ * deal with a change in the status of the CTS line
+ */
+static void mn10300_serial_cts_changed(struct mn10300_serial_port *port, u8 st)
+{
+	u16 ctr;
+
+	port->tx_cts = st;
+	port->uart.icount.cts++;
+
+	/* flip the CTS state selector flag to interrupt when it changes
+	 * back */
+	ctr = *port->_control;
+	ctr ^= SC2CTR_TWS;
+	*port->_control = ctr;
+
+	uart_handle_cts_change(&port->uart, st & SC2STR_CTS);
+	wake_up_interruptible(&port->uart.info->delta_msr_wait);
+}
+
+/*
+ * handle a virtual interrupt generated by the lower level "virtual DMA"
+ * routines (irq is the baud timer interrupt)
+ */
+static irqreturn_t mn10300_serial_interrupt(int irq, void *dev_id)
+{
+	struct mn10300_serial_port *port = dev_id;
+	u8 st;
+
+	spin_lock(&port->uart.lock);
+
+	if (port->intr_flags) {
+		_debug("INT %s: %x", port->name, port->intr_flags);
+
+		if (mask_test_and_clear(&port->intr_flags, MNSCx_RX_AVAIL))
+			mn10300_serial_receive_interrupt(port);
+
+		if (mask_test_and_clear(&port->intr_flags,
+					MNSCx_TX_SPACE | MNSCx_TX_EMPTY))
+			mn10300_serial_transmit_interrupt(port);
+	}
+
+	/* the only modem control line amongst the whole lot is CTS on
+	 * serial port 2 */
+	if (port->type == PORT_MN10300_CTS) {
+		st = *port->_status;
+		if ((port->tx_cts ^ st) & SC2STR_CTS)
+			mn10300_serial_cts_changed(port, st);
+	}
+
+	spin_unlock(&port->uart.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * return indication of whether the hardware transmit buffer is empty
+ */
+static unsigned int mn10300_serial_tx_empty(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	return (*port->_status & (SC01STR_TXF | SC01STR_TBF)) ?
+		0 : TIOCSER_TEMT;
+}
+
+/*
+ * set the modem control lines (we don't have any)
+ */
+static void mn10300_serial_set_mctrl(struct uart_port *_port,
+				     unsigned int mctrl)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s,%x", port->name, mctrl);
+}
+
+/*
+ * get the modem control line statuses
+ */
+static unsigned int mn10300_serial_get_mctrl(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	if (port->type == PORT_MN10300_CTS && !(*port->_status & SC2STR_CTS))
+		return TIOCM_CAR | TIOCM_DSR;
+
+	return TIOCM_CAR | TIOCM_CTS | TIOCM_DSR;
+}
+
+/*
+ * stop transmitting characters
+ */
+static void mn10300_serial_stop_tx(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	/* disable the virtual DMA */
+	mn10300_serial_dis_tx_intr(port);
+}
+
+/*
+ * start transmitting characters
+ * - jump-start transmission if it has stalled
+ *   - enable the serial Tx interrupt (used by the virtual DMA controller)
+ *   - force an interrupt to happen if necessary
+ */
+static void mn10300_serial_start_tx(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	u16 x;
+
+	_enter("%s{%lu}",
+	       port->name,
+	       CIRC_CNT(&port->uart.info->xmit.head,
+			&port->uart.info->xmit.tail,
+			UART_XMIT_SIZE));
+
+	/* kick the virtual DMA controller */
+	x = *port->tx_icr;
+	x |= GxICR_ENABLE;
+
+	if (*port->_status & SC01STR_TBF)
+		x &= ~(GxICR_REQUEST | GxICR_DETECT);
+	else
+		x |= GxICR_REQUEST | GxICR_DETECT;
+
+	_debug("CTR=%04hx ICR=%02hx STR=%04x TMD=%02hx TBR=%04hx ICR=%04hx",
+	       *port->_control, *port->_intr, *port->_status,
+	       *port->_tmxmd, *port->_tmxbr, *port->tx_icr);
+
+	*port->tx_icr = x;
+	x = *port->tx_icr;
+}
+
+/*
+ * transmit a high-priority XON/XOFF character
+ */
+static void mn10300_serial_send_xchar(struct uart_port *_port, char ch)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s,%02x", port->name, ch);
+
+	if (likely(port->gdbstub)) {
+		port->tx_xchar = ch;
+		if (ch)
+			mn10300_serial_en_tx_intr(port);
+	}
+}
+
+/*
+ * stop receiving characters
+ * - called whilst the port is being closed
+ */
+static void mn10300_serial_stop_rx(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	u16 ctr;
+
+	_enter("%s", port->name);
+
+	ctr = *port->_control;
+	ctr &= ~SC01CTR_RXE;
+	*port->_control = ctr;
+
+	mn10300_serial_dis_rx_intr(port);
+}
+
+/*
+ * enable modem status interrupts
+ */
+static void mn10300_serial_enable_ms(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	u16 ctr, cts;
+
+	_enter("%s", port->name);
+
+	if (port->type == PORT_MN10300_CTS) {
+		/* want to interrupt when CTS goes low if CTS is now high and
+		 * vice versa
+		 */
+		port->tx_cts = *port->_status;
+
+		cts = (port->tx_cts & SC2STR_CTS) ?
+			SC2CTR_TWE : SC2CTR_TWE | SC2CTR_TWS;
+
+		ctr = *port->_control;
+		ctr &= ~SC2CTR_TWS;
+		ctr |= cts;
+		*port->_control = ctr;
+
+		mn10300_serial_en_tx_intr(port);
+	}
+}
+
+/*
+ * transmit or cease transmitting a break signal
+ */
+static void mn10300_serial_break_ctl(struct uart_port *_port, int ctl)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s,%d", port->name, ctl);
+
+	if (ctl) {
+		/* tell the virtual DMA handler to assert BREAK */
+		port->tx_break = 1;
+		mn10300_serial_en_tx_intr(port);
+	} else {
+		port->tx_break = 0;
+		*port->_control &= ~SC01CTR_BKE;
+		mn10300_serial_en_tx_intr(port);
+	}
+}
+
+/*
+ * grab the interrupts and enable the port for reception
+ */
+static int mn10300_serial_startup(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+	struct mn10300_serial_int *pint;
+
+	_enter("%s{%d}", port->name, port->gdbstub);
+
+	if (unlikely(port->gdbstub))
+		return -EBUSY;
+
+	/* allocate an Rx buffer for the virtual DMA handler */
+	port->rx_buffer = kmalloc(MNSC_BUFFER_SIZE, GFP_KERNEL);
+	if (!port->rx_buffer)
+		return -ENOMEM;
+
+	port->rx_inp = port->rx_outp = 0;
+
+	/* finally, enable the device */
+	*port->_intr = SC01ICR_TI;
+	*port->_control |= SC01CTR_TXE | SC01CTR_RXE;
+
+	pint = &mn10300_serial_int_tbl[port->rx_irq];
+	pint->port = port;
+	pint->vdma = mn10300_serial_vdma_rx_handler;
+	pint = &mn10300_serial_int_tbl[port->tx_irq];
+	pint->port = port;
+	pint->vdma = mn10300_serial_vdma_tx_handler;
+
+	set_intr_level(port->rx_irq, GxICR_LEVEL_1);
+	set_intr_level(port->tx_irq, GxICR_LEVEL_1);
+	set_irq_chip(port->tm_irq, &mn10300_serial_pic);
+
+	if (request_irq(port->rx_irq, mn10300_serial_interrupt,
+			IRQF_DISABLED, port->rx_name, port) < 0)
+		goto error;
+
+	if (request_irq(port->tx_irq, mn10300_serial_interrupt,
+			IRQF_DISABLED, port->tx_name, port) < 0)
+		goto error2;
+
+	if (request_irq(port->tm_irq, mn10300_serial_interrupt,
+			IRQF_DISABLED, port->tm_name, port) < 0)
+		goto error3;
+	mn10300_serial_mask_ack(port->tm_irq);
+
+	return 0;
+
+error3:
+	free_irq(port->tx_irq, port);
+error2:
+	free_irq(port->rx_irq, port);
+error:
+	kfree(port->rx_buffer);
+	port->rx_buffer = NULL;
+	return -EBUSY;
+}
+
+/*
+ * shutdown the port and release interrupts
+ */
+static void mn10300_serial_shutdown(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	/* disable the serial port and its baud rate timer */
+	port->tx_break = 0;
+	*port->_control &= ~(SC01CTR_TXE | SC01CTR_RXE | SC01CTR_BKE);
+	*port->_tmxmd = 0;
+
+	if (port->rx_buffer) {
+		void *buf = port->rx_buffer;
+		port->rx_buffer = NULL;
+		kfree(buf);
+	}
+
+	/* disable all intrs */
+	free_irq(port->tm_irq, port);
+	free_irq(port->rx_irq, port);
+	free_irq(port->tx_irq, port);
+
+	*port->rx_icr = GxICR_LEVEL_1;
+	*port->tx_icr = GxICR_LEVEL_1;
+}
+
+/*
+ * this routine is called to set the UART divisor registers to match the
+ * specified baud rate for a serial port.
+ */
+static void mn10300_serial_change_speed(struct mn10300_serial_port *port,
+					  struct ktermios *new,
+					  struct ktermios *old)
+{
+	unsigned long flags;
+	unsigned long ioclk = port->ioclk;
+	unsigned cflag;
+	int baud, bits, xdiv, tmp;
+	u16 tmxbr, scxctr;
+	u8 tmxmd, battempt;
+	u8 div_timer = port->div_timer;
+
+	_enter("%s{%lu}", port->name, ioclk);
+
+	/* byte size and parity */
+	cflag = new->c_cflag;
+	switch (cflag & CSIZE) {
+	case CS7: scxctr = SC01CTR_CLN_7BIT; bits = 9;  break;
+	case CS8: scxctr = SC01CTR_CLN_8BIT; bits = 10; break;
+	default:  scxctr = SC01CTR_CLN_8BIT; bits = 10; break;
+	}
+
+	if (cflag & CSTOPB) {
+		scxctr |= SC01CTR_STB_2BIT;
+		bits++;
+	}
+
+	if (cflag & PARENB) {
+		bits++;
+		if (cflag & PARODD)
+			scxctr |= SC01CTR_PB_ODD;
+#ifdef CMSPAR
+		else if (cflag & CMSPAR)
+			scxctr |= SC01CTR_PB_FIXED0;
+#endif
+		else
+			scxctr |= SC01CTR_PB_EVEN;
+	}
+
+	/* Determine divisor based on baud rate */
+	battempt = 0;
+
+	if (div_timer == MNSCx_DIV_TIMER_16BIT)
+		scxctr |= SC0CTR_CK_TM8UFLOW_8; /* ( == SC1CTR_CK_TM9UFLOW_8
+						 *   == SC2CTR_CK_TM10UFLOW) */
+	else if (div_timer == MNSCx_DIV_TIMER_8BIT)
+		scxctr |= SC0CTR_CK_TM2UFLOW_8;
+
+try_alternative:
+	baud = uart_get_baud_rate(&port->uart, new, old, 0,
+				  port->ioclk / 8);
+
+	_debug("ALT %d [baud %d]", battempt, baud);
+
+	if (!baud)
+		baud = 9600;	/* B0 transition handled in rs_set_termios */
+	xdiv = 1;
+	if (baud == 134) {
+		baud = 269;	/* 134 is really 134.5 */
+		xdiv = 2;
+	}
+
+	if (baud == 38400 &&
+	    (port->uart.flags & UPF_SPD_MASK) == UPF_SPD_CUST
+	    ) {
+		_debug("CUSTOM %u", port->uart.custom_divisor);
+
+		if (div_timer == MNSCx_DIV_TIMER_16BIT) {
+			if (port->uart.custom_divisor <= 65535) {
+				tmxmd = TM8MD_SRC_IOCLK;
+				tmxbr = port->uart.custom_divisor;
+				port->uart.uartclk = ioclk;
+				goto timer_okay;
+			}
+			if (port->uart.custom_divisor / 8 <= 65535) {
+				tmxmd = TM8MD_SRC_IOCLK_8;
+				tmxbr = port->uart.custom_divisor / 8;
+				port->uart.custom_divisor = tmxbr * 8;
+				port->uart.uartclk = ioclk / 8;
+				goto timer_okay;
+			}
+			if (port->uart.custom_divisor / 32 <= 65535) {
+				tmxmd = TM8MD_SRC_IOCLK_32;
+				tmxbr = port->uart.custom_divisor / 32;
+				port->uart.custom_divisor = tmxbr * 32;
+				port->uart.uartclk = ioclk / 32;
+				goto timer_okay;
+			}
+
+		} else if (div_timer == MNSCx_DIV_TIMER_8BIT) {
+			if (port->uart.custom_divisor <= 255) {
+				tmxmd = TM2MD_SRC_IOCLK;
+				tmxbr = port->uart.custom_divisor;
+				port->uart.uartclk = ioclk;
+				goto timer_okay;
+			}
+			if (port->uart.custom_divisor / 8 <= 255) {
+				tmxmd = TM2MD_SRC_IOCLK_8;
+				tmxbr = port->uart.custom_divisor / 8;
+				port->uart.custom_divisor = tmxbr * 8;
+				port->uart.uartclk = ioclk / 8;
+				goto timer_okay;
+			}
+			if (port->uart.custom_divisor / 32 <= 255) {
+				tmxmd = TM2MD_SRC_IOCLK_32;
+				tmxbr = port->uart.custom_divisor / 32;
+				port->uart.custom_divisor = tmxbr * 32;
+				port->uart.uartclk = ioclk / 32;
+				goto timer_okay;
+			}
+		}
+	}
+
+	switch (div_timer) {
+	case MNSCx_DIV_TIMER_16BIT:
+		port->uart.uartclk = ioclk;
+		tmxmd = TM8MD_SRC_IOCLK;
+		tmxbr = tmp = (ioclk / (baud * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+
+		port->uart.uartclk = ioclk / 8;
+		tmxmd = TM8MD_SRC_IOCLK_8;
+		tmxbr = tmp = (ioclk / (baud * 8 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+
+		port->uart.uartclk = ioclk / 32;
+		tmxmd = TM8MD_SRC_IOCLK_32;
+		tmxbr = tmp = (ioclk / (baud * 32 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 65535)
+			goto timer_okay;
+		break;
+
+	case MNSCx_DIV_TIMER_8BIT:
+		port->uart.uartclk = ioclk;
+		tmxmd = TM2MD_SRC_IOCLK;
+		tmxbr = tmp = (ioclk / (baud * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+
+		port->uart.uartclk = ioclk / 8;
+		tmxmd = TM2MD_SRC_IOCLK_8;
+		tmxbr = tmp = (ioclk / (baud * 8 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+
+		port->uart.uartclk = ioclk / 32;
+		tmxmd = TM2MD_SRC_IOCLK_32;
+		tmxbr = tmp = (ioclk / (baud * 32 * xdiv) + 4) / 8 - 1;
+		if (tmp > 0 && tmp <= 255)
+			goto timer_okay;
+		break;
+
+	default:
+		BUG();
+		return;
+	}
+
+	/* refuse to change to a baud rate we can't support */
+	_debug("CAN'T SUPPORT");
+
+	switch (battempt) {
+	case 0:
+		if (old) {
+			new->c_cflag &= ~CBAUD;
+			new->c_cflag |= (old->c_cflag & CBAUD);
+			battempt = 1;
+			goto try_alternative;
+		}
+
+	case 1:
+		/* as a last resort, if the quotient is zero, default to 9600
+		 * bps */
+		new->c_cflag &= ~CBAUD;
+		new->c_cflag |= B9600;
+		battempt = 2;
+		goto try_alternative;
+
+	default:
+		/* hmmm... can't seem to support 9600 either
+		 * - we could try iterating through the speeds we know about to
+		 *   find the lowest
+		 */
+		new->c_cflag &= ~CBAUD;
+		new->c_cflag |= B0;
+
+		if (div_timer == MNSCx_DIV_TIMER_16BIT)
+			tmxmd = TM8MD_SRC_IOCLK_32;
+		else if (div_timer == MNSCx_DIV_TIMER_8BIT)
+			tmxmd = TM2MD_SRC_IOCLK_32;
+		tmxbr = 1;
+
+		port->uart.uartclk = ioclk / 32;
+		break;
+	}
+timer_okay:
+
+	_debug("UARTCLK: %u / %hu", port->uart.uartclk, tmxbr);
+
+	/* make the changes */
+	spin_lock_irqsave(&port->uart.lock, flags);
+
+	uart_update_timeout(&port->uart, new->c_cflag, baud);
+
+	/* set the timer to produce the required baud rate */
+	switch (div_timer) {
+	case MNSCx_DIV_TIMER_16BIT:
+		*port->_tmxmd = 0;
+		*port->_tmxbr = tmxbr;
+		*port->_tmxmd = TM8MD_INIT_COUNTER;
+		*port->_tmxmd = tmxmd | TM8MD_COUNT_ENABLE;
+		break;
+
+	case MNSCx_DIV_TIMER_8BIT:
+		*port->_tmxmd = 0;
+		*(volatile u8 *) port->_tmxbr = (u8) tmxbr;
+		*port->_tmxmd = TM2MD_INIT_COUNTER;
+		*port->_tmxmd = tmxmd | TM2MD_COUNT_ENABLE;
+		break;
+	}
+
+	/* CTS flow control flag and modem status interrupts */
+	scxctr &= ~(SC2CTR_TWE | SC2CTR_TWS);
+
+	if (port->type == PORT_MN10300_CTS && cflag & CRTSCTS) {
+		/* want to interrupt when CTS goes low if CTS is now
+		 * high and vice versa
+		 */
+		port->tx_cts = *port->_status;
+
+		if (port->tx_cts & SC2STR_CTS)
+			scxctr |= SC2CTR_TWE;
+		else
+			scxctr |= SC2CTR_TWE | SC2CTR_TWS;
+	}
+
+	/* set up parity check flag */
+	port->uart.read_status_mask = (1 << TTY_NORMAL) | (1 << TTY_OVERRUN);
+	if (new->c_iflag & INPCK)
+		port->uart.read_status_mask |=
+			(1 << TTY_PARITY) | (1 << TTY_FRAME);
+	if (new->c_iflag & (BRKINT | PARMRK))
+		port->uart.read_status_mask |= (1 << TTY_BREAK);
+
+	/* characters to ignore */
+	port->uart.ignore_status_mask = 0;
+	if (new->c_iflag & IGNPAR)
+		port->uart.ignore_status_mask |=
+			(1 << TTY_PARITY) | (1 << TTY_FRAME);
+	if (new->c_iflag & IGNBRK) {
+		port->uart.ignore_status_mask |= (1 << TTY_BREAK);
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns to (for real raw support).
+		 */
+		if (new->c_iflag & IGNPAR)
+			port->uart.ignore_status_mask |= (1 << TTY_OVERRUN);
+	}
+
+	/* Ignore all characters if CREAD is not set */
+	if ((new->c_cflag & CREAD) == 0)
+		port->uart.ignore_status_mask |= (1 << TTY_NORMAL);
+
+	scxctr |= *port->_control & (SC01CTR_TXE | SC01CTR_RXE | SC01CTR_BKE);
+	*port->_control = scxctr;
+
+	spin_unlock_irqrestore(&port->uart.lock, flags);
+}
+
+/*
+ * set the terminal I/O parameters
+ */
+static void mn10300_serial_set_termios(struct uart_port *_port,
+					 struct ktermios *new,
+					 struct ktermios *old)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s,%p,%p", port->name, new, old);
+
+	mn10300_serial_change_speed(port, new, old);
+
+	/* handle turning off CRTSCTS */
+	if (!(new->c_cflag & CRTSCTS)) {
+		u16 ctr = *port->_control;
+		ctr &= ~SC2CTR_TWE;
+		*port->_control = ctr;
+	}
+}
+
+/*
+ * return description of port type
+ */
+static const char *mn10300_serial_type(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	if (port->uart.type == PORT_MN10300_CTS)
+		return "MN10300 SIF_CTS";
+
+	return "MN10300 SIF";
+}
+
+/*
+ * release I/O and memory regions in use by port
+ */
+static void mn10300_serial_release_port(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	release_mem_region((unsigned long) port->_iobase, 16);
+}
+
+/*
+ * request I/O and memory regions for port
+ */
+static int mn10300_serial_request_port(struct uart_port *_port)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	request_mem_region((unsigned long) port->_iobase, 16, port->name);
+	return 0;
+}
+
+/*
+ * configure the type and reserve the ports
+ */
+static void mn10300_serial_config_port(struct uart_port *_port, int type)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+
+	_enter("%s", port->name);
+
+	port->uart.type = PORT_MN10300;
+
+	if (port->options & MNSCx_OPT_CTS)
+		port->uart.type = PORT_MN10300_CTS;
+
+	mn10300_serial_request_port(_port);
+}
+
+/*
+ * verify serial parameters are suitable for this port type
+ */
+static int mn10300_serial_verify_port(struct uart_port *_port,
+					struct serial_struct *ss)
+{
+	struct mn10300_serial_port *port =
+		container_of(_port, struct mn10300_serial_port, uart);
+	void *mapbase = (void *) (unsigned long) port->uart.mapbase;
+
+	_enter("%s", port->name);
+
+	/* these things may not be changed */
+	if (ss->irq		!= port->uart.irq	||
+	    ss->port		!= port->uart.iobase	||
+	    ss->io_type		!= port->uart.iotype	||
+	    ss->iomem_base	!= mapbase ||
+	    ss->iomem_reg_shift	!= port->uart.regshift	||
+	    ss->hub6		!= port->uart.hub6	||
+	    ss->xmit_fifo_size	!= port->uart.fifosize)
+		return -EINVAL;
+
+	/* type may be changed on a port that supports CTS */
+	if (ss->type != port->uart.type) {
+		if (!(port->options & MNSCx_OPT_CTS))
+			return -EINVAL;
+
+		if (ss->type != PORT_MN10300 &&
+		    ss->type != PORT_MN10300_CTS)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * initialise the MN10300 on-chip UARTs
+ */
+static int __init mn10300_serial_init(void)
+{
+	struct mn10300_serial_port *port;
+	int ret, i;
+
+	printk(KERN_INFO "%s version %s (%s)\n",
+	       serial_name, serial_version, serial_revdate);
+
+#ifdef CONFIG_MN10300_TTYSM2
+	SC2TIM = 8; /* make the baud base of timer 2 IOCLK/8 */
+#endif
+
+	set_intr_stub(EXCEP_IRQ_LEVEL1, mn10300_serial_vdma_interrupt);
+
+	ret = uart_register_driver(&mn10300_serial_driver);
+	if (!ret) {
+		for (i = 0 ; i < NR_PORTS ; i++) {
+			port = mn10300_serial_ports[i];
+			if (!port || port->gdbstub)
+				continue;
+
+			switch (port->clock_src) {
+			case MNSCx_CLOCK_SRC_IOCLK:
+				port->ioclk = MN10300_IOCLK;
+				break;
+
+#ifdef MN10300_IOBCLK
+			case MNSCx_CLOCK_SRC_IOBCLK:
+				port->ioclk = MN10300_IOBCLK;
+				break;
+#endif
+			default:
+				BUG();
+			}
+
+			ret = uart_add_one_port(&mn10300_serial_driver,
+						&port->uart);
+
+			if (ret < 0) {
+				_debug("ERROR %d", -ret);
+				break;
+			}
+		}
+
+		if (ret)
+			uart_unregister_driver(&mn10300_serial_driver);
+	}
+
+	return ret;
+}
+
+__initcall(mn10300_serial_init);
+
+
+#ifdef CONFIG_MN10300_TTYSM_CONSOLE
+
+/*
+ * print a string to the serial port without disturbing the real user of the
+ * port too much
+ * - the console must be locked by the caller
+ */
+static void mn10300_serial_console_write(struct console *co,
+					   const char *s, unsigned count)
+{
+	struct mn10300_serial_port *port;
+	unsigned i;
+	u16 scxctr, txicr, tmp;
+	u8 tmxmd;
+
+	port = mn10300_serial_ports[co->index];
+
+	/* firstly hijack the serial port from the "virtual DMA" controller */
+	txicr = *port->tx_icr;
+	*port->tx_icr = GxICR_LEVEL_1;
+	tmp = *port->tx_icr;
+
+	/* the transmitter may be disabled */
+	scxctr = *port->_control;
+	if (!(scxctr & SC01CTR_TXE)) {
+		/* restart the UART clock */
+		tmxmd = *port->_tmxmd;
+
+		switch (port->div_timer) {
+		case MNSCx_DIV_TIMER_16BIT:
+			*port->_tmxmd = 0;
+			*port->_tmxmd = TM8MD_INIT_COUNTER;
+			*port->_tmxmd = tmxmd | TM8MD_COUNT_ENABLE;
+			break;
+
+		case MNSCx_DIV_TIMER_8BIT:
+			*port->_tmxmd = 0;
+			*port->_tmxmd = TM2MD_INIT_COUNTER;
+			*port->_tmxmd = tmxmd | TM2MD_COUNT_ENABLE;
+			break;
+		}
+
+		/* enable the transmitter */
+		*port->_control = (scxctr & ~SC01CTR_BKE) | SC01CTR_TXE;
+
+	} else if (scxctr & SC01CTR_BKE) {
+		/* stop transmitting BREAK */
+		*port->_control = (scxctr & ~SC01CTR_BKE);
+	}
+
+	/* send the chars into the serial port (with LF -> LFCR conversion) */
+	for (i = 0; i < count; i++) {
+		char ch = *s++;
+
+		while (*port->_status & SC01STR_TBF)
+			continue;
+		*(u8 *) port->_txb = ch;
+
+		if (ch == 0x0a) {
+			while (*port->_status & SC01STR_TBF)
+				continue;
+			*(u8 *) port->_txb = 0xd;
+		}
+	}
+
+	/* can't let the transmitter be turned off if it's actually
+	 * transmitting */
+	while (*port->_status & (SC01STR_TXF | SC01STR_TBF))
+		continue;
+
+	/* disable the transmitter if we re-enabled it */
+	if (!(scxctr & SC01CTR_TXE))
+		*port->_control = scxctr;
+
+	*port->tx_icr = txicr;
+	tmp = *port->tx_icr;
+}
+
+/*
+ * set up a serial port as a console
+ * - construct a cflag setting for the first rs_open()
+ * - initialize the serial port
+ * - return non-zero if we didn't find a serial port.
+ */
+static int __init mn10300_serial_console_setup(struct console *co,
+						 char *options)
+{
+	struct mn10300_serial_port *port;
+	int i, parity = 'n', baud = 9600, bits = 8, flow = 0;
+
+	for (i = 0 ; i < NR_PORTS ; i++) {
+		port = mn10300_serial_ports[i];
+		if (port && !port->gdbstub && port->uart.line == co->index)
+			goto found_device;
+	}
+
+	return -ENODEV;
+
+found_device:
+	switch (port->clock_src) {
+	case MNSCx_CLOCK_SRC_IOCLK:
+		port->ioclk = MN10300_IOCLK;
+		break;
+
+#ifdef MN10300_IOBCLK
+	case MNSCx_CLOCK_SRC_IOBCLK:
+		port->ioclk = MN10300_IOBCLK;
+		break;
+#endif
+	default:
+		BUG();
+	}
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&port->uart, co, baud, parity, bits, flow);
+}
+
+/*
+ * register console
+ */
+static int __init mn10300_serial_console_init(void)
+{
+	register_console(&mn10300_serial_console);
+	return 0;
+}
+
+console_initcall(mn10300_serial_console_init);
+#endif
diff --git a/arch/mn10300/kernel/mn10300-serial.h b/arch/mn10300/kernel/mn10300-serial.h
new file mode 100644
index 000000000000..6796499bf789
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-serial.h
@@ -0,0 +1,126 @@
+/* MN10300 On-chip serial port driver definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _MN10300_SERIAL_H
+#define _MN10300_SERIAL_H
+
+#ifndef __ASSEMBLY__
+#include <linux/serial_core.h>
+#include <linux/termios.h>
+#endif
+
+#include <asm/page.h>
+#include <asm/serial-regs.h>
+
+#define NR_PORTS		3		/* should be set 3 or 9 or 16 */
+
+#define MNSC_BUFFER_SIZE	+(PAGE_SIZE / 2)
+
+/* intr_flags bits */
+#define MNSCx_RX_AVAIL		0x01
+#define MNSCx_RX_OVERF		0x02
+#define MNSCx_TX_SPACE		0x04
+#define MNSCx_TX_EMPTY		0x08
+
+#ifndef __ASSEMBLY__
+
+struct mn10300_serial_port {
+	char			*rx_buffer;	/* reception buffer base */
+	unsigned		rx_inp;		/* pointer to rx input offset */
+	unsigned		rx_outp;	/* pointer to rx output offset */
+	u8			tx_xchar;	/* high-priority XON/XOFF buffer */
+	u8			tx_break;	/* transmit break request */
+	u8			intr_flags;	/* interrupt flags */
+	volatile u16		*rx_icr;	/* Rx interrupt control register */
+	volatile u16		*tx_icr;	/* Tx interrupt control register */
+	int			rx_irq;		/* reception IRQ */
+	int			tx_irq;		/* transmission IRQ */
+	int			tm_irq;		/* timer IRQ */
+
+	const char		*name;		/* name of serial port */
+	const char		*rx_name;	/* Rx interrupt handler name of serial port */
+	const char		*tx_name;	/* Tx interrupt handler name of serial port */
+	const char		*tm_name;	/* Timer interrupt handler name */
+	unsigned short		type;		/* type of serial port */
+	unsigned char		isconsole;	/* T if it's a console */
+	volatile void		*_iobase;	/* pointer to base of I/O control regs */
+	volatile u16		*_control;	/* control register pointer */
+	volatile u8		*_status;	/* status register pointer */
+	volatile u8		*_intr;		/* interrupt register pointer */
+	volatile void		*_rxb;		/* receive buffer register pointer */
+	volatile void		*_txb;		/* transmit buffer register pointer */
+	volatile u16		*_tmicr;	/* timer interrupt control register */
+	volatile u8		*_tmxmd;	/* baud rate timer mode register */
+	volatile u16		*_tmxbr;	/* baud rate timer base register */
+
+	/* this must come down here so that assembly can use BSET to access the
+	 * above fields */
+	struct uart_port	uart;
+
+	unsigned short		rx_brk;		/* current break reception status */
+	u16			tx_cts;		/* current CTS status */
+	int			gdbstub;	/* preemptively stolen by GDB stub */
+
+	u8			clock_src;	/* clock source */
+#define MNSCx_CLOCK_SRC_IOCLK	0
+#define MNSCx_CLOCK_SRC_IOBCLK	1
+
+	u8			div_timer;	/* timer used as divisor */
+#define MNSCx_DIV_TIMER_16BIT	0
+#define MNSCx_DIV_TIMER_8BIT	1
+
+	u16			options;	/* options */
+#define MNSCx_OPT_CTS		0x0001
+
+	unsigned long		ioclk;		/* base clock rate */
+};
+
+#ifdef CONFIG_MN10300_TTYSM0
+extern struct mn10300_serial_port mn10300_serial_port_sif0;
+#endif
+
+#ifdef CONFIG_MN10300_TTYSM1
+extern struct mn10300_serial_port mn10300_serial_port_sif1;
+#endif
+
+#ifdef CONFIG_MN10300_TTYSM2
+extern struct mn10300_serial_port mn10300_serial_port_sif2;
+#endif
+
+extern struct mn10300_serial_port *mn10300_serial_ports[];
+
+struct mn10300_serial_int {
+	struct mn10300_serial_port *port;
+	asmlinkage void (*vdma)(void);
+};
+
+extern struct mn10300_serial_int mn10300_serial_int_tbl[];
+
+extern asmlinkage void mn10300_serial_vdma_interrupt(void);
+extern asmlinkage void mn10300_serial_vdma_rx_handler(void);
+extern asmlinkage void mn10300_serial_vdma_tx_handler(void);
+
+#endif /* __ASSEMBLY__ */
+
+#if defined(CONFIG_GDBSTUB_ON_TTYSM0)
+#define SCgSTR SC0STR
+#define SCgRXB SC0RXB
+#define SCgRXIRQ SC0RXIRQ
+#elif defined(CONFIG_GDBSTUB_ON_TTYSM1)
+#define SCgSTR SC1STR
+#define SCgRXB SC1RXB
+#define SCgRXIRQ SC1RXIRQ
+#elif defined(CONFIG_GDBSTUB_ON_TTYSM2)
+#define SCgSTR SC2STR
+#define SCgRXB SC2RXB
+#define SCgRXIRQ SC2RXIRQ
+#endif
+
+#endif /* _MN10300_SERIAL_H */
diff --git a/arch/mn10300/kernel/mn10300-watchdog-low.S b/arch/mn10300/kernel/mn10300-watchdog-low.S
new file mode 100644
index 000000000000..996244745cca
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-watchdog-low.S
@@ -0,0 +1,59 @@
+###############################################################################
+#
+# MN10300 Watchdog interrupt handler
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/intctl-regs.h>
+#include <asm/timer-regs.h>
+#include <asm/frame.inc>
+
+	.text
+
+###############################################################################
+#
+# Watchdog handler entry point
+# - special non-maskable interrupt
+#
+###############################################################################
+	.globl	watchdog_handler
+	.type	watchdog_handler,@function
+watchdog_handler:
+	add	-4,sp
+	SAVE_ALL
+
+	mov	0xffffffff,d0
+	mov	d0,(REG_ORIG_D0,fp)
+
+	mov	fp,d0
+	lsr	2,d1
+	call	watchdog_interrupt[],0		# watchdog_interrupt(regs,irq)
+
+	jmp	ret_from_intr
+
+	.size	watchdog_handler,.-watchdog_handler
+
+###############################################################################
+#
+# Watchdog touch entry point
+# - kept to absolute minimum (unfortunately, it's prototyped in linux/nmi.h so
+#   we can't inline it)
+#
+###############################################################################
+	.globl	touch_nmi_watchdog
+	.type	touch_nmi_watchdog,@function
+touch_nmi_watchdog:
+	clr	d0
+	mov	d0,(watchdog_alert_counter)
+	ret	[],0
+
+	.size	touch_nmi_watchdog,.-touch_nmi_watchdog
diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
new file mode 100644
index 000000000000..10811e981d20
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300-watchdog.c
@@ -0,0 +1,183 @@
+/* MN10300 Watchdog timer
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from arch/i386/kernel/nmi.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/nmi.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/intctl-regs.h>
+#include <asm/rtc-regs.h>
+#include <asm/div64.h>
+#include <asm/smp.h>
+#include <asm/gdb-stub.h>
+#include <asm/proc/clock.h>
+
+static DEFINE_SPINLOCK(watchdog_print_lock);
+static unsigned int watchdog;
+static unsigned int watchdog_hz = 1;
+unsigned int watchdog_alert_counter;
+
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+/*
+ * the best way to detect whether a CPU has a 'hard lockup' problem
+ * is to check its timer makes IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are generated on every CPU, we only
+ * have to check the current processor.
+ *
+ * since NMIs dont listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up any console locks first ...
+ * [when there will be more tty-related locks, break them up
+ *  here too!]
+ */
+static unsigned int last_irq_sums[NR_CPUS];
+
+int __init check_watchdog(void)
+{
+	irq_cpustat_t tmp[1];
+
+	printk(KERN_INFO "Testing Watchdog... ");
+
+	memcpy(tmp, irq_stat, sizeof(tmp));
+	local_irq_enable();
+	mdelay((10 * 1000) / watchdog_hz); /* wait 10 ticks */
+	local_irq_disable();
+
+	if (nmi_count(0) - tmp[0].__nmi_count <= 5) {
+		printk(KERN_WARNING "CPU#%d: Watchdog appears to be stuck!\n",
+		       0);
+		return -1;
+	}
+
+	printk(KERN_INFO "OK.\n");
+
+	/* now that we know it works we can reduce NMI frequency to
+	 * something more reasonable; makes a difference in some configs
+	 */
+	watchdog_hz = 1;
+
+	return 0;
+}
+
+static int __init setup_watchdog(char *str)
+{
+	unsigned tmp;
+	int opt;
+	u8 ctr;
+
+	get_option(&str, &opt);
+	if (opt != 1)
+		return 0;
+
+	watchdog = opt;
+	if (watchdog) {
+		set_intr_stub(EXCEP_WDT, watchdog_handler);
+		ctr = WDCTR_WDCK_65536th;
+		WDCTR = WDCTR_WDRST | ctr;
+		WDCTR = ctr;
+		tmp = WDCTR;
+
+		tmp = __muldiv64u(1 << (16 + ctr * 2), 1000000, MN10300_WDCLK);
+		tmp = 1000000000 / tmp;
+		watchdog_hz = (tmp + 500) / 1000;
+	}
+
+	return 1;
+}
+
+__setup("watchdog=", setup_watchdog);
+
+void __init watchdog_go(void)
+{
+	u8 wdt;
+
+	if (watchdog) {
+		printk(KERN_INFO "Watchdog: running at %uHz\n", watchdog_hz);
+		wdt = WDCTR & ~WDCTR_WDCNE;
+		WDCTR = wdt | WDCTR_WDRST;
+		wdt = WDCTR;
+		WDCTR = wdt | WDCTR_WDCNE;
+		wdt = WDCTR;
+
+		check_watchdog();
+	}
+}
+
+asmlinkage
+void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep)
+{
+
+	/*
+	 * Since current-> is always on the stack, and we always switch
+	 * the stack NMI-atomically, it's safe to use smp_processor_id().
+	 */
+	int sum, cpu = smp_processor_id();
+	u8 wdt, tmp;
+
+	wdt = WDCTR & ~WDCTR_WDCNE;
+	WDCTR = wdt;
+	tmp = WDCTR;
+	NMICR = NMICR_WDIF;
+
+	nmi_count(cpu)++;
+	kstat_this_cpu.irqs[NMIIRQ]++;
+	sum = irq_stat[cpu].__irq_count;
+
+	if (last_irq_sums[cpu] == sum) {
+		/*
+		 * Ayiee, looks like this CPU is stuck ...
+		 * wait a few IRQs (5 seconds) before doing the oops ...
+		 */
+		watchdog_alert_counter++;
+		if (watchdog_alert_counter == 5 * watchdog_hz) {
+			spin_lock(&watchdog_print_lock);
+			/*
+			 * We are in trouble anyway, lets at least try
+			 * to get a message out.
+			 */
+			bust_spinlocks(1);
+			printk(KERN_ERR
+			       "NMI Watchdog detected LOCKUP on CPU%d,"
+			       " pc %08lx, registers:\n",
+			       cpu, regs->pc);
+			show_registers(regs);
+			printk("console shuts up ...\n");
+			console_silent();
+			spin_unlock(&watchdog_print_lock);
+			bust_spinlocks(0);
+#ifdef CONFIG_GDBSTUB
+			if (gdbstub_busy)
+				gdbstub_exception(regs, excep);
+			else
+				gdbstub_intercept(regs, excep);
+#endif
+			do_exit(SIGSEGV);
+		}
+	} else {
+		last_irq_sums[cpu] = sum;
+		watchdog_alert_counter = 0;
+	}
+
+	WDCTR = wdt | WDCTR_WDRST;
+	tmp = WDCTR;
+	WDCTR = wdt | WDCTR_WDCNE;
+	tmp = WDCTR;
+}
diff --git a/arch/mn10300/kernel/mn10300_ksyms.c b/arch/mn10300/kernel/mn10300_ksyms.c
new file mode 100644
index 000000000000..6d19628634e3
--- /dev/null
+++ b/arch/mn10300/kernel/mn10300_ksyms.c
@@ -0,0 +1,37 @@
+/* MN10300 Miscellaneous and library kernel exports
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+
+EXPORT_SYMBOL(change_bit);
+EXPORT_SYMBOL(test_and_change_bit);
+
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memset);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__generic_copy_from_user);
+EXPORT_SYMBOL(__generic_copy_to_user);
+EXPORT_SYMBOL(strnlen_user);
+
+extern u64 __ashrdi3(u64, unsigned);
+extern u64 __ashldi3(u64, unsigned);
+extern u64 __lshrdi3(u64, unsigned);
+extern s64 __negdi2(s64);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__negdi2);
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c
new file mode 100644
index 000000000000..0e4d2f6fa6e8
--- /dev/null
+++ b/arch/mn10300/kernel/module.c
@@ -0,0 +1,206 @@
+/* MN10300 Kernel module helper routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by Mark Salter (msalter@redhat.com)
+ * - Derived from arch/i386/kernel/module.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public Licence as published by
+ * the Free Software Foundation; either version 2 of the Licence, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public Licence for more details.
+ *
+ * You should have received a copy of the GNU General Public Licence
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt, ...)
+#endif
+
+/*
+ * allocate storage for a module
+ */
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc_exec(size);
+}
+
+/*
+ * free memory returned from module_alloc()
+ */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	 * table entries. */
+}
+
+/*
+ * allow the arch to fix up the section table
+ * - we don't need anything special
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+static uint32_t reloc_get16(uint8_t *p)
+{
+	return p[0] | (p[1] << 8);
+}
+
+static uint32_t reloc_get24(uint8_t *p)
+{
+	return reloc_get16(p) | (p[2] << 16);
+}
+
+static uint32_t reloc_get32(uint8_t *p)
+{
+	return reloc_get16(p) | (reloc_get16(p+2) << 16);
+}
+
+static void reloc_put16(uint8_t *p, uint32_t val)
+{
+	p[0] = val & 0xff;
+	p[1] = (val >> 8) & 0xff;
+}
+
+static void reloc_put24(uint8_t *p, uint32_t val)
+{
+	reloc_put16(p, val);
+	p[2] = (val >> 16) & 0xff;
+}
+
+static void reloc_put32(uint8_t *p, uint32_t val)
+{
+	reloc_put16(p, val);
+	reloc_put16(p+2, val >> 16);
+}
+
+/*
+ * apply a REL relocation
+ */
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	printk(KERN_ERR "module %s: RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+
+/*
+ * apply a RELA relocation
+ */
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	Elf32_Addr relocation;
+	uint8_t *location;
+	uint32_t value;
+
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* this is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+
+		/* this is the symbol the relocation is referring to (note that
+		 * all undefined symbols have been resolved by the caller) */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		/* this is the adjustment to be made */
+		relocation = sym->st_value + rel[i].r_addend;
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+			/* for the first four relocation types, we add the
+			 * adjustment into the value at the location given */
+		case R_MN10300_32:
+			value = reloc_get32(location);
+			value += relocation;
+			reloc_put32(location, value);
+			break;
+		case R_MN10300_24:
+			value = reloc_get24(location);
+			value += relocation;
+			reloc_put24(location, value);
+			break;
+		case R_MN10300_16:
+			value = reloc_get16(location);
+			value += relocation;
+			reloc_put16(location, value);
+			break;
+		case R_MN10300_8:
+			*location += relocation;
+			break;
+
+			/* for the next three relocation types, we write the
+			 * adjustment with the address subtracted over the
+			 * value at the location given */
+		case R_MN10300_PCREL32:
+			value = relocation - (uint32_t) location;
+			reloc_put32(location, value);
+			break;
+		case R_MN10300_PCREL16:
+			value = relocation - (uint32_t) location;
+			reloc_put16(location, value);
+			break;
+		case R_MN10300_PCREL8:
+			*location = relocation - (uint32_t) location;
+			break;
+
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+/*
+ * finish loading the module
+ */
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	return 0;
+}
+
+/*
+ * finish clearing the module
+ */
+void module_arch_cleanup(struct module *mod)
+{
+}
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
new file mode 100644
index 000000000000..3b0d579fc15d
--- /dev/null
+++ b/arch/mn10300/kernel/process.c
@@ -0,0 +1,297 @@
+/* MN10300  Process handling code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/percpu.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+#include <asm/fpu.h>
+#include <asm/reset-regs.h>
+#include <asm/gdb-stub.h>
+#include "internal.h"
+
+/*
+ * power management idle function, if any..
+ */
+void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
+
+/*
+ * return saved PC of a blocked thread.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	return ((unsigned long *) tsk->thread.sp)[3];
+}
+
+/*
+ * power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * we use this if we don't have any better idle routine
+ */
+static void default_idle(void)
+{
+	local_irq_disable();
+	if (!need_resched())
+		safe_halt();
+	else
+		local_irq_enable();
+}
+
+/*
+ * the idle thread
+ * - there's no useful work to be done, so just try to conserve power and have
+ *   a low exit latency (ie sit in a loop waiting for somebody to say that
+ *   they'd like to reschedule)
+ */
+void cpu_idle(void)
+{
+	int cpu = smp_processor_id();
+
+	/* endless idle loop with no priority at all */
+	for (;;) {
+		while (!need_resched()) {
+			void (*idle)(void);
+
+			smp_rmb();
+			idle = pm_idle;
+			if (!idle)
+				idle = default_idle;
+
+			irq_stat[cpu].idle_timestamp = jiffies;
+			idle();
+		}
+
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+void release_segments(struct mm_struct *mm)
+{
+}
+
+void machine_restart(char *cmd)
+{
+#ifdef CONFIG_GDBSTUB
+	gdbstub_exit(0);
+#endif
+
+#ifdef mn10300_unit_hard_reset
+	mn10300_unit_hard_reset();
+#else
+	mn10300_proc_hard_reset();
+#endif
+}
+
+void machine_halt(void)
+{
+#ifdef CONFIG_GDBSTUB
+	gdbstub_exit(0);
+#endif
+}
+
+void machine_power_off(void)
+{
+#ifdef CONFIG_GDBSTUB
+	gdbstub_exit(0);
+#endif
+}
+
+void show_regs(struct pt_regs *regs)
+{
+}
+
+/*
+ * create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.a2 = (unsigned long) fn;
+	regs.d2 = (unsigned long) arg;
+	regs.pc = (unsigned long) kernel_thread_helper;
+	local_save_flags(regs.epsw);
+	regs.epsw |= EPSW_IE | EPSW_IM_7;
+
+	/* Ok, create the new process.. */
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0,
+		       NULL, NULL);
+}
+
+/*
+ * free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	exit_fpu();
+}
+
+void flush_thread(void)
+{
+	flush_fpu();
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+}
+
+/*
+ * this gets called before we allocate a new thread and copy the current task
+ * into it so that we can store lazy state into memory
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	unlazy_fpu(tsk);
+}
+
+/*
+ * set up the kernel stack for a new thread and copy arch-specific thread
+ * control information
+ */
+int copy_thread(int nr, unsigned long clone_flags,
+		unsigned long c_usp, unsigned long ustk_size,
+		struct task_struct *p, struct pt_regs *kregs)
+{
+	struct pt_regs *c_uregs, *c_kregs, *uregs;
+	unsigned long c_ksp;
+
+	uregs = current->thread.uregs;
+
+	c_ksp = (unsigned long) task_stack_page(p) + THREAD_SIZE;
+
+	/* allocate the userspace exception frame and set it up */
+	c_ksp -= sizeof(struct pt_regs);
+	c_uregs = (struct pt_regs *) c_ksp;
+
+	p->thread.uregs = c_uregs;
+	*c_uregs = *uregs;
+	c_uregs->sp = c_usp;
+	c_uregs->epsw &= ~EPSW_FE; /* my FPU */
+
+	c_ksp -= 12; /* allocate function call ABI slack */
+
+	/* the new TLS pointer is passed in as arg #5 to sys_clone() */
+	if (clone_flags & CLONE_SETTLS)
+		c_uregs->e2 = __frame->d3;
+
+	/* set up the return kernel frame if called from kernel_thread() */
+	c_kregs = c_uregs;
+	if (kregs != uregs) {
+		c_ksp -= sizeof(struct pt_regs);
+		c_kregs = (struct pt_regs *) c_ksp;
+		*c_kregs = *kregs;
+		c_kregs->sp = c_usp;
+		c_kregs->next = c_uregs;
+#ifdef CONFIG_MN10300_CURRENT_IN_E2
+		c_kregs->e2 = (unsigned long) p; /* current */
+#endif
+
+		c_ksp -= 12; /* allocate function call ABI slack */
+	}
+
+	/* set up things up so the scheduler can start the new task */
+	p->thread.__frame = c_kregs;
+	p->thread.a3	= (unsigned long) c_kregs;
+	p->thread.sp	= c_ksp;
+	p->thread.pc	= (unsigned long) ret_from_fork;
+	p->thread.wchan	= (unsigned long) ret_from_fork;
+	p->thread.usp	= c_usp;
+
+	return 0;
+}
+
+/*
+ * clone a process
+ * - tlsptr is retrieved by copy_thread() from __frame->d3
+ */
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
+			  int __user *parent_tidptr, int __user *child_tidptr,
+			  int __user *tlsptr)
+{
+	return do_fork(clone_flags, newsp ?: __frame->sp, __frame, 0,
+		       parent_tidptr, child_tidptr);
+}
+
+asmlinkage long sys_fork(void)
+{
+	return do_fork(SIGCHLD, __frame->sp, __frame, 0, NULL, NULL);
+}
+
+asmlinkage long sys_vfork(void)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, __frame->sp, __frame,
+		       0, NULL, NULL);
+}
+
+asmlinkage long sys_execve(char __user *name,
+			   char __user * __user *argv,
+			   char __user * __user *envp)
+{
+	char *filename;
+	int error;
+
+	lock_kernel();
+
+	filename = getname(name);
+	error = PTR_ERR(filename);
+	if (!IS_ERR(filename)) {
+		error = do_execve(filename, argv, envp, __frame);
+		if (error == 0)
+			current->ptrace &= ~PT_DTRACE;
+
+		putname(filename);
+	}
+
+	unlock_kernel();
+	return error;
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	return p->thread.wchan;
+}
diff --git a/arch/mn10300/kernel/profile-low.S b/arch/mn10300/kernel/profile-low.S
new file mode 100644
index 000000000000..94ffac12d02d
--- /dev/null
+++ b/arch/mn10300/kernel/profile-low.S
@@ -0,0 +1,72 @@
+###############################################################################
+#
+# Fast profiling interrupt handler
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/intctl-regs.h>
+#include <asm/timer-regs.h>
+
+#define pi break
+
+	.balign	4
+counter:
+	.long	-1
+
+###############################################################################
+#
+# Profiling interrupt entry point
+# - intended to run at interrupt priority 1
+#
+###############################################################################
+ENTRY(profile_handler)
+	movm	[d2,d3,a2],(sp)
+
+	# ignore userspace
+	mov	(12,sp),d2
+	and	EPSW_nSL,d2
+	bne	out
+
+	# do nothing if there's no buffer
+	mov	(prof_buffer),a2
+	and	a2,a2
+	beq	out
+	or	0x20000000,a2
+
+	# calculate relative position in text segment
+	mov	(16,sp),d2
+	sub	_stext,d2
+	mov	(prof_shift),d3
+	lsr	d3,d2
+	mov	(prof_len),d3
+	cmp	d3,d2
+	bcc	outside_text
+
+	# increment the appropriate profile bucket
+do_inc:
+	asl2	d2
+	mov	(a2,d2),d3
+	inc	d3
+	mov	d3,(a2,d2)
+out:
+	mov	GxICR_DETECT,d2
+	movbu	d2,(TM11ICR)		# ACK the interrupt
+	movbu	(TM11ICR),d2
+	movm	(sp),[d2,d3,a2]
+	rti
+
+outside_text:
+	sub	1,d3
+	mov	d3,d2
+	bra	do_inc
diff --git a/arch/mn10300/kernel/profile.c b/arch/mn10300/kernel/profile.c
new file mode 100644
index 000000000000..20d7d0306b16
--- /dev/null
+++ b/arch/mn10300/kernel/profile.c
@@ -0,0 +1,51 @@
+/* MN10300 Profiling setup
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * initialise the profiling if enabled
+ * - using with gdbstub will give anomalous results
+ * - can't be used with gdbstub if running at IRQ priority 0
+ */
+static __init int profile_init(void)
+{
+	u16 tmp;
+
+	if (!prof_buffer)
+		return 0;
+
+	/* use timer 11 to drive the profiling interrupts */
+	set_intr_stub(EXCEP_IRQ_LEVEL0, profile_handler);
+
+	/* set IRQ priority at which to run */
+	set_intr_level(TM11IRQ, GxICR_LEVEL_0);
+
+	/* set up timer 11
+	 * - source: (IOCLK 33MHz)*2 = 66MHz
+	 * - frequency: (33330000*2) / 8 / 20625 = 202Hz
+	 */
+	TM11BR = 20625 - 1;
+	TM11MD = TM8MD_SRC_IOCLK_8;
+	TM11MD |= TM8MD_INIT_COUNTER;
+	TM11MD &= ~TM8MD_INIT_COUNTER;
+	TM11MD |= TM8MD_COUNT_ENABLE;
+
+	TM11ICR |= GxICR_ENABLE;
+	tmp = TM11ICR;
+
+	printk(KERN_INFO "Profiling initiated on timer 11, priority 0, %uHz\n",
+	       mn10300_ioclk / 8 / (TM11BR + 1));
+	printk(KERN_INFO "Profile histogram stored %p-%p\n",
+	       prof_buffer, (u8 *)(prof_buffer + prof_len) - 1);
+
+	return 0;
+}
+
+__initcall(profile_init);
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
new file mode 100644
index 000000000000..d6d6cdc75c52
--- /dev/null
+++ b/arch/mn10300/kernel/ptrace.c
@@ -0,0 +1,379 @@
+/* MN10300 Process tracing
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * translate ptrace register IDs into struct pt_regs offsets
+ */
+static const u8 ptrace_regid_to_frame[] = {
+	[PT_A3 << 2]		= REG_A3,
+	[PT_A2 << 2]		= REG_A2,
+	[PT_D3 << 2]		= REG_D3,
+	[PT_D2 << 2]		= REG_D2,
+	[PT_MCVF << 2]		= REG_MCVF,
+	[PT_MCRL << 2]		= REG_MCRL,
+	[PT_MCRH << 2]		= REG_MCRH,
+	[PT_MDRQ << 2]		= REG_MDRQ,
+	[PT_E1 << 2]		= REG_E1,
+	[PT_E0 << 2]		= REG_E0,
+	[PT_E7 << 2]		= REG_E7,
+	[PT_E6 << 2]		= REG_E6,
+	[PT_E5 << 2]		= REG_E5,
+	[PT_E4 << 2]		= REG_E4,
+	[PT_E3 << 2]		= REG_E3,
+	[PT_E2 << 2]		= REG_E2,
+	[PT_SP << 2]		= REG_SP,
+	[PT_LAR << 2]		= REG_LAR,
+	[PT_LIR << 2]		= REG_LIR,
+	[PT_MDR << 2]		= REG_MDR,
+	[PT_A1 << 2]		= REG_A1,
+	[PT_A0 << 2]		= REG_A0,
+	[PT_D1 << 2]		= REG_D1,
+	[PT_D0 << 2]		= REG_D0,
+	[PT_ORIG_D0 << 2]	= REG_ORIG_D0,
+	[PT_EPSW << 2]		= REG_EPSW,
+	[PT_PC << 2]		= REG_PC,
+};
+
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+	return *(unsigned long *)
+		((unsigned long) task->thread.uregs + offset);
+}
+
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the TSS.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline
+int put_stack_long(struct task_struct *task, int offset, unsigned long data)
+{
+	unsigned long stack;
+
+	stack = (unsigned long) task->thread.uregs + offset;
+	*(unsigned long *) stack = data;
+	return 0;
+}
+
+static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
+				       struct task_struct *tsk)
+{
+	return __copy_to_user(buf, &tsk->thread.fpu_state,
+			      sizeof(struct fpu_state_struct));
+}
+
+static inline unsigned long set_fpregs(struct task_struct *tsk,
+				       struct fpu_state_struct *buf)
+{
+	return __copy_from_user(&tsk->thread.fpu_state, buf,
+				sizeof(struct fpu_state_struct));
+}
+
+static inline void fpsave_init(struct task_struct *task)
+{
+	memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+}
+
+/*
+ * make sure the single step bit is not set
+ */
+void ptrace_disable(struct task_struct *child)
+{
+#ifndef CONFIG_MN10300_USING_JTAG
+	struct user *dummy = NULL;
+	long tmp;
+
+	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
+	tmp &= ~EPSW_T;
+	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
+#endif
+}
+
+/*
+ * set the single step bit
+ */
+void ptrace_enable(struct task_struct *child)
+{
+#ifndef CONFIG_MN10300_USING_JTAG
+	struct user *dummy = NULL;
+	long tmp;
+
+	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
+	tmp |= EPSW_T;
+	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
+#endif
+}
+
+/*
+ * handle the arch-specific side of process tracing
+ */
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	struct fpu_state_struct fpu_state;
+	int i, ret;
+
+	switch (request) {
+	/* read the word at location addr. */
+	case PTRACE_PEEKTEXT: {
+		unsigned long tmp;
+		int copied;
+
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+	}
+
+	/* read the word at location addr. */
+	case PTRACE_PEEKDATA: {
+		unsigned long tmp;
+		int copied;
+
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+	}
+
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR: {
+		unsigned long tmp;
+
+		ret = -EIO;
+		if ((addr & 3) || addr < 0 ||
+		    addr > sizeof(struct user) - 3)
+			break;
+
+		tmp = 0;  /* Default return condition */
+		if (addr < NR_PTREGS << 2)
+			tmp = get_stack_long(child,
+					     ptrace_regid_to_frame[addr]);
+		ret = put_user(tmp, (unsigned long *) data);
+		break;
+	}
+
+	/* write the word at location addr. */
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
+		    sizeof(data))
+			ret = 0;
+		else
+			ret = -EIO;
+		break;
+
+		/* write the word at location addr in the USER area */
+	case PTRACE_POKEUSR:
+		ret = -EIO;
+		if ((addr & 3) || addr < 0 ||
+		    addr > sizeof(struct user) - 3)
+			break;
+
+		ret = 0;
+		if (addr < NR_PTREGS << 2)
+			ret = put_stack_long(child, ptrace_regid_to_frame[addr],
+					     data);
+		break;
+
+		/* continue and stop at next (return from) syscall */
+	case PTRACE_SYSCALL:
+		/* restart after signal. */
+	case PTRACE_CONT:
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		if (request == PTRACE_SYSCALL)
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		else
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		child->exit_code = data;
+		ptrace_disable(child);
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+		/*
+		 * make the child exit
+		 * - the best I can do is send it a sigkill
+		 * - perhaps it should be put in the status that it wants to
+		 *   exit
+		 */
+	case PTRACE_KILL:
+		ret = 0;
+		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+			break;
+		child->exit_code = SIGKILL;
+		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+		ptrace_disable(child);
+		wake_up_process(child);
+		break;
+
+	case PTRACE_SINGLESTEP: /* set the trap flag. */
+#ifndef CONFIG_MN10300_USING_JTAG
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		ptrace_enable(child);
+		child->exit_code = data;
+		wake_up_process(child);
+		ret = 0;
+#else
+		ret = -EINVAL;
+#endif
+		break;
+
+	case PTRACE_DETACH:	/* detach a process that was attached. */
+		ret = ptrace_detach(child, data);
+		break;
+
+		/* Get all gp regs from the child. */
+	case PTRACE_GETREGS: {
+		unsigned long tmp;
+
+		if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
+			ret = -EIO;
+			break;
+		}
+
+		for (i = 0; i < NR_PTREGS << 2; i += 4) {
+			tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
+			__put_user(tmp, (unsigned long *) data);
+			data += sizeof(tmp);
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+		unsigned long tmp;
+
+		if (!access_ok(VERIFY_READ, (unsigned long *)data,
+			       sizeof(struct pt_regs))) {
+			ret = -EIO;
+			break;
+		}
+
+		for (i = 0; i < NR_PTREGS << 2; i += 4) {
+			__get_user(tmp, (unsigned long *) data);
+			put_stack_long(child, ptrace_regid_to_frame[i], tmp);
+			data += sizeof(tmp);
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
+		if (is_using_fpu(child)) {
+			unlazy_fpu(child);
+			fpu_state = child->thread.fpu_state;
+		} else {
+			memset(&fpu_state, 0, sizeof(fpu_state));
+		}
+
+		ret = -EIO;
+		if (copy_to_user((void *) data, &fpu_state,
+				 sizeof(fpu_state)) == 0)
+			ret = 0;
+		break;
+	}
+
+	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
+		ret = -EFAULT;
+		if (copy_from_user(&fpu_state, (const void *) data,
+				   sizeof(fpu_state)) == 0) {
+			fpu_kill_state(child);
+			child->thread.fpu_state = fpu_state;
+			set_using_fpu(child);
+			ret = 0;
+		}
+		break;
+	}
+
+	case PTRACE_SETOPTIONS: {
+		if (data & PTRACE_O_TRACESYSGOOD)
+			child->ptrace |= PT_TRACESYSGOOD;
+		else
+			child->ptrace &= ~PT_TRACESYSGOOD;
+		ret = 0;
+		break;
+	}
+
+	default:
+		ret = -EIO;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+{
+#if 0
+	/* just in case... */
+	printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
+	       current->pid,
+	       regs->orig_d0,
+	       regs->a0,
+	       regs->d1,
+	       regs->a3,
+	       regs->a2,
+	       regs->d0);
+	return;
+#endif
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
+	    !test_thread_flag(TIF_SINGLESTEP))
+		return;
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	/* the 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP |
+		      ((current->ptrace & PT_TRACESYSGOOD) &&
+		       !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
new file mode 100644
index 000000000000..042f792d8430
--- /dev/null
+++ b/arch/mn10300/kernel/rtc.c
@@ -0,0 +1,173 @@
+/* MN10300 RTC management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/timex.h>
+#include <asm/rtc-regs.h>
+#include <asm/rtc.h>
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+/* last time the RTC got updated */
+static long last_rtc_update;
+
+/* time for RTC to update itself in ioclks */
+static unsigned long mn10300_rtc_update_period;
+
+/*
+ * read the current RTC time
+ */
+unsigned long __init get_initial_rtc_time(void)
+{
+	struct rtc_time tm;
+
+	get_rtc_time(&tm);
+
+	return mktime(tm.tm_year, tm.tm_mon, tm.tm_mday,
+		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+}
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
+ * ms after the second nowtime has started, because when nowtime is written
+ * into the registers of the CMOS clock, it will jump to the next second
+ * precisely 500 ms later.  Check the Motorola MC146818A or Dallas DS12887 data
+ * sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ *      sets the minutes. Usually you'll only notice that after reboot!
+ */
+static int set_rtc_mmss(unsigned long nowtime)
+{
+	unsigned char save_control, save_freq_select;
+	int retval = 0;
+	int real_seconds, real_minutes, cmos_minutes;
+
+	/* gets recalled with irq locally disabled */
+	spin_lock(&rtc_lock);
+	save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being
+						* set */
+	CMOS_WRITE(save_control | RTC_SET, RTC_CONTROL);
+
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset
+							* prescaler */
+	CMOS_WRITE(save_freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
+
+	cmos_minutes = CMOS_READ(RTC_MINUTES);
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+		BCD_TO_BIN(cmos_minutes);
+
+	/*
+	 * since we're only adjusting minutes and seconds,
+	 * don't interfere with hour overflow. This avoids
+	 * messing with unknown time zones but requires your
+	 * RTC not to be off by more than 15 minutes
+	 */
+	real_seconds = nowtime % 60;
+	real_minutes = nowtime / 60;
+	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
+		/* correct for half hour time zone */
+		real_minutes += 30;
+	real_minutes %= 60;
+
+	if (abs(real_minutes - cmos_minutes) < 30) {
+		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+			BIN_TO_BCD(real_seconds);
+			BIN_TO_BCD(real_minutes);
+		}
+		CMOS_WRITE(real_seconds, RTC_SECONDS);
+		CMOS_WRITE(real_minutes, RTC_MINUTES);
+	} else {
+		printk(KERN_WARNING
+		       "set_rtc_mmss: can't update from %d to %d\n",
+		       cmos_minutes, real_minutes);
+		retval = -1;
+	}
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	spin_unlock(&rtc_lock);
+
+	return retval;
+}
+
+void check_rtc_time(void)
+{
+	/* the RTC clock just finished ticking over again this second
+	 * - if we have an externally synchronized Linux clock, then update
+	 *   RTC clock accordingly every ~11 minutes. set_rtc_mmss() has to be
+	 *   called as close as possible to 500 ms before the new second starts.
+	 */
+	if ((time_status & STA_UNSYNC) == 0 &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    xtime.tv_nsec / 1000 >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
+	    xtime.tv_nsec / 1000 <= 500000 + ((unsigned) TICK_SIZE) / 2
+	    ) {
+		if (set_rtc_mmss(xtime.tv_sec) == 0)
+			last_rtc_update = xtime.tv_sec;
+		else
+			/* do it again in 60s */
+			last_rtc_update = xtime.tv_sec - 600;
+	}
+}
+
+/*
+ * calibrate the TSC clock against the RTC
+ */
+void __init calibrate_clock(void)
+{
+	unsigned long count0, counth, count1;
+	unsigned char status;
+
+	/* make sure the RTC is running and is set to operate in 24hr mode */
+	status = RTSRC;
+	RTCRB |= RTCRB_SET;
+	RTCRB |= RTCRB_TM_24HR;
+	RTCRA |= RTCRA_DVR;
+	RTCRA &= ~RTCRA_DVR;
+	RTCRB &= ~RTCRB_SET;
+
+	/* work out the clock speed by counting clock cycles between ends of
+	 * the RTC update cycle - track the RTC through one complete update
+	 * cycle (1 second)
+	 */
+	startup_timestamp_counter();
+
+	while (!(RTCRA & RTCRA_UIP)) {}
+	while ((RTCRA & RTCRA_UIP)) {}
+
+	count0 = TMTSCBC;
+
+	while (!(RTCRA & RTCRA_UIP)) {}
+
+	counth = TMTSCBC;
+
+	while ((RTCRA & RTCRA_UIP)) {}
+
+	count1 = TMTSCBC;
+
+	shutdown_timestamp_counter();
+
+	MN10300_TSCCLK = count0 - count1; /* the timers count down */
+	mn10300_rtc_update_period = counth - count1;
+	MN10300_TSC_PER_HZ = MN10300_TSCCLK / HZ;
+}
diff --git a/arch/mn10300/kernel/semaphore.c b/arch/mn10300/kernel/semaphore.c
new file mode 100644
index 000000000000..9153c4039fd2
--- /dev/null
+++ b/arch/mn10300/kernel/semaphore.c
@@ -0,0 +1,149 @@
+/* MN10300 Semaphore implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <asm/semaphore.h>
+
+struct sem_waiter {
+	struct list_head	list;
+	struct task_struct	*task;
+};
+
+#if SEMAPHORE_DEBUG
+void semtrace(struct semaphore *sem, const char *str)
+{
+	if (sem->debug)
+		printk(KERN_DEBUG "[%d] %s({%d,%d})\n",
+		       current->pid,
+		       str,
+		       atomic_read(&sem->count),
+		       list_empty(&sem->wait_list) ? 0 : 1);
+}
+#else
+#define semtrace(SEM, STR) do { } while (0)
+#endif
+
+/*
+ * wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __down(struct semaphore *sem, unsigned long flags)
+{
+	struct task_struct *tsk = current;
+	struct sem_waiter waiter;
+
+	semtrace(sem, "Entering __down");
+
+	/* set up my own style of waitqueue */
+	waiter.task = tsk;
+	get_task_struct(tsk);
+
+	list_add_tail(&waiter.list, &sem->wait_list);
+
+	/* we don't need to touch the semaphore struct anymore */
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	/* wait to be given the semaphore */
+	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+	for (;;) {
+		if (!waiter.task)
+			break;
+		schedule();
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	}
+
+	tsk->state = TASK_RUNNING;
+	semtrace(sem, "Leaving __down");
+}
+EXPORT_SYMBOL(__down);
+
+/*
+ * interruptibly wait for a token to be granted from a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+int __down_interruptible(struct semaphore *sem, unsigned long flags)
+{
+	struct task_struct *tsk = current;
+	struct sem_waiter waiter;
+	int ret;
+
+	semtrace(sem, "Entering __down_interruptible");
+
+	/* set up my own style of waitqueue */
+	waiter.task = tsk;
+	get_task_struct(tsk);
+
+	list_add_tail(&waiter.list, &sem->wait_list);
+
+	/* we don't need to touch the semaphore struct anymore */
+	set_task_state(tsk, TASK_INTERRUPTIBLE);
+
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	/* wait to be given the semaphore */
+	ret = 0;
+	for (;;) {
+		if (!waiter.task)
+			break;
+		if (unlikely(signal_pending(current)))
+			goto interrupted;
+		schedule();
+		set_task_state(tsk, TASK_INTERRUPTIBLE);
+	}
+
+ out:
+	tsk->state = TASK_RUNNING;
+	semtrace(sem, "Leaving __down_interruptible");
+	return ret;
+
+ interrupted:
+	spin_lock_irqsave(&sem->wait_lock, flags);
+	list_del(&waiter.list);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	ret = 0;
+	if (!waiter.task) {
+		put_task_struct(current);
+		ret = -EINTR;
+	}
+	goto out;
+}
+EXPORT_SYMBOL(__down_interruptible);
+
+/*
+ * release a single token back to a semaphore
+ * - entered with lock held and interrupts disabled
+ */
+void __up(struct semaphore *sem)
+{
+	struct task_struct *tsk;
+	struct sem_waiter *waiter;
+
+	semtrace(sem, "Entering __up");
+
+	/* grant the token to the process at the front of the queue */
+	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
+
+	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
+	 * It is an allocated on the waiter's stack and may become invalid at
+	 * any time after that point (due to a wakeup from another source).
+	 */
+	list_del_init(&waiter->list);
+	tsk = waiter->task;
+	smp_mb();
+	waiter->task = NULL;
+	wake_up_process(tsk);
+	put_task_struct(tsk);
+
+	semtrace(sem, "Leaving __up");
+}
+EXPORT_SYMBOL(__up);
diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
new file mode 100644
index 000000000000..6b7ce2636851
--- /dev/null
+++ b/arch/mn10300/kernel/setup.c
@@ -0,0 +1,298 @@
+/* MN10300 Arch-specific initialisation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/proc/proc.h>
+#include <asm/busctl-regs.h>
+#include <asm/fpu.h>
+#include <asm/sections.h>
+
+struct mn10300_cpuinfo boot_cpu_data;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x18000000;
+
+char redboot_command_line[COMMAND_LINE_SIZE] =
+	"console=ttyS0,115200 root=/dev/mtdblock3 rw";
+
+char __initdata redboot_platform_name[COMMAND_LINE_SIZE];
+
+static struct resource code_resource = {
+	.start	= 0x100000,
+	.end	= 0,
+	.name	= "Kernel code",
+};
+
+static struct resource data_resource = {
+	.start	= 0,
+	.end	= 0,
+	.name	= "Kernel data",
+};
+
+static unsigned long __initdata phys_memory_base;
+static unsigned long __initdata phys_memory_end;
+static unsigned long __initdata memory_end;
+unsigned long memory_size;
+
+struct thread_info *__current_ti = &init_thread_union.thread_info;
+struct task_struct *__current = &init_task;
+
+#define mn10300_known_cpus 3
+static const char *const mn10300_cputypes[] = {
+	"am33v1",
+	"am33v2",
+	"am34v1",
+	"unknown"
+};
+
+/*
+ *
+ */
+static void __init parse_mem_cmdline(char **cmdline_p)
+{
+	char *from, *to, c;
+
+	/* save unparsed command line copy for /proc/cmdline */
+	strcpy(boot_command_line, redboot_command_line);
+
+	/* see if there's an explicit memory size option */
+	from = redboot_command_line;
+	to = redboot_command_line;
+	c = ' ';
+
+	for (;;) {
+		if (c == ' ' && !memcmp(from, "mem=", 4)) {
+			if (to != redboot_command_line)
+				to--;
+			memory_size = memparse(from + 4, &from);
+		}
+
+		c = *(from++);
+		if (!c)
+			break;
+
+		*(to++) = c;
+	}
+
+	*to = '\0';
+	*cmdline_p = redboot_command_line;
+
+	if (memory_size == 0)
+		panic("Memory size not known\n");
+
+	memory_end = (unsigned long) CONFIG_KERNEL_RAM_BASE_ADDRESS +
+		memory_size;
+	if (memory_end > phys_memory_end)
+		memory_end = phys_memory_end;
+}
+
+/*
+ * architecture specific setup
+ */
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long bootmap_size;
+	unsigned long kstart_pfn, start_pfn, free_pfn, end_pfn;
+
+	cpu_init();
+	unit_setup();
+	parse_mem_cmdline(cmdline_p);
+
+	init_mm.start_code = (unsigned long)&_text;
+	init_mm.end_code = (unsigned long) &_etext;
+	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.brk = (unsigned long) &_end;
+
+	code_resource.start = virt_to_bus(&_text);
+	code_resource.end = virt_to_bus(&_etext)-1;
+	data_resource.start = virt_to_bus(&_etext);
+	data_resource.end = virt_to_bus(&_edata)-1;
+
+#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+
+	start_pfn = (CONFIG_KERNEL_RAM_BASE_ADDRESS >> PAGE_SHIFT);
+	kstart_pfn = PFN_UP(__pa(&_text));
+	free_pfn = PFN_UP(__pa(&_end));
+	end_pfn = PFN_DOWN(__pa(memory_end));
+
+	bootmap_size = init_bootmem_node(&contig_page_data,
+					 free_pfn,
+					 start_pfn,
+					 end_pfn);
+
+	if (kstart_pfn > start_pfn)
+		free_bootmem(PFN_PHYS(start_pfn),
+			     PFN_PHYS(kstart_pfn - start_pfn));
+
+	free_bootmem(PFN_PHYS(free_pfn),
+		     PFN_PHYS(end_pfn - free_pfn));
+
+	/* If interrupt vector table is in main ram, then we need to
+	   reserve the page it is occupying. */
+	if (CONFIG_INTERRUPT_VECTOR_BASE >= CONFIG_KERNEL_RAM_BASE_ADDRESS &&
+	    CONFIG_INTERRUPT_VECTOR_BASE < memory_end)
+		reserve_bootmem(CONFIG_INTERRUPT_VECTOR_BASE, 1,
+				BOOTMEM_DEFAULT);
+
+	reserve_bootmem(PAGE_ALIGN(PFN_PHYS(free_pfn)), bootmap_size,
+			BOOTMEM_DEFAULT);
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+
+	paging_init();
+}
+
+/*
+ * perform CPU initialisation
+ */
+void __init cpu_init(void)
+{
+	unsigned long cpurev = CPUREV, type;
+	unsigned long base, size;
+
+	type = (CPUREV & CPUREV_TYPE) >> CPUREV_TYPE_S;
+	if (type > mn10300_known_cpus)
+		type = mn10300_known_cpus;
+
+	printk(KERN_INFO "Matsushita %s, rev %ld\n",
+	       mn10300_cputypes[type],
+	       (cpurev & CPUREV_REVISION) >> CPUREV_REVISION_S);
+
+	/* determine the memory size and base from the memory controller regs */
+	memory_size = 0;
+
+	base = SDBASE(0);
+	if (base & SDBASE_CE) {
+		size = (base & SDBASE_CBAM) << SDBASE_CBAM_SHIFT;
+		size = ~size + 1;
+		base &= SDBASE_CBA;
+
+		printk(KERN_INFO "SDRAM[0]: %luMb @%08lx\n", size >> 20, base);
+		memory_size += size;
+		phys_memory_base = base;
+	}
+
+	base = SDBASE(1);
+	if (base & SDBASE_CE) {
+		size = (base & SDBASE_CBAM) << SDBASE_CBAM_SHIFT;
+		size = ~size + 1;
+		base &= SDBASE_CBA;
+
+		printk(KERN_INFO "SDRAM[1]: %luMb @%08lx\n", size >> 20, base);
+		memory_size += size;
+		if (phys_memory_base == 0)
+			phys_memory_base = base;
+	}
+
+	phys_memory_end = phys_memory_base + memory_size;
+
+#ifdef CONFIG_FPU
+	fpu_init_state();
+#endif
+}
+
+/*
+ * Get CPU information for use by the procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long cpurev = CPUREV, type, icachesz, dcachesz;
+
+	type = (CPUREV & CPUREV_TYPE) >> CPUREV_TYPE_S;
+	if (type > mn10300_known_cpus)
+		type = mn10300_known_cpus;
+
+	icachesz =
+		((cpurev & CPUREV_ICWAY ) >> CPUREV_ICWAY_S)  *
+		((cpurev & CPUREV_ICSIZE) >> CPUREV_ICSIZE_S) *
+		1024;
+
+	dcachesz =
+		((cpurev & CPUREV_DCWAY ) >> CPUREV_DCWAY_S)  *
+		((cpurev & CPUREV_DCSIZE) >> CPUREV_DCSIZE_S) *
+		1024;
+
+	seq_printf(m,
+		   "processor  : 0\n"
+		   "vendor_id  : Matsushita\n"
+		   "cpu core   : %s\n"
+		   "cpu rev    : %lu\n"
+		   "model name : " PROCESSOR_MODEL_NAME		"\n"
+		   "icache size: %lu\n"
+		   "dcache size: %lu\n",
+		   mn10300_cputypes[type],
+		   (cpurev & CPUREV_REVISION) >> CPUREV_REVISION_S,
+		   icachesz,
+		   dcachesz
+		   );
+
+	seq_printf(m,
+		   "ioclk speed: %lu.%02luMHz\n"
+		   "bogomips   : %lu.%02lu\n\n",
+		   MN10300_IOCLK / 1000000,
+		   (MN10300_IOCLK / 10000) % 100,
+		   loops_per_jiffy / (500000 / HZ),
+		   (loops_per_jiffy / (5000 / HZ)) % 100
+		   );
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/mn10300/kernel/sigframe.h b/arch/mn10300/kernel/sigframe.h
new file mode 100644
index 000000000000..0decba28ae84
--- /dev/null
+++ b/arch/mn10300/kernel/sigframe.h
@@ -0,0 +1,33 @@
+/* MN10300 Signal frame definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+struct sigframe
+{
+	void (*pretcode)(void);
+	int sig;
+	struct sigcontext *psc;
+	struct sigcontext sc;
+	struct fpucontext fpuctx;
+	unsigned long extramask[_NSIG_WORDS-1];
+	char retcode[8];
+};
+
+struct rt_sigframe
+{
+	void (*pretcode)(void);
+	int sig;
+	struct siginfo *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	struct fpucontext fpuctx;
+	char retcode[8];
+};
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
new file mode 100644
index 000000000000..841ca9955a18
--- /dev/null
+++ b/arch/mn10300/kernel/signal.c
@@ -0,0 +1,564 @@
+/* MN10300 Signal handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <asm/cacheflush.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/fpu.h>
+#include "sigframe.h"
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	return -ERESTARTNOHAND;
+}
+
+/*
+ * set signal action syscall
+ */
+asmlinkage long sys_sigaction(int sig,
+			      const struct old_sigaction __user *act,
+			      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+/*
+ * set alternate signal stack syscall
+ */
+asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t *uoss)
+{
+	return do_sigaltstack(uss, uoss, __frame->sp);
+}
+
+/*
+ * do a signal return; undo the signal stack.
+ */
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc, long *_d0)
+{
+	unsigned int err = 0;
+
+	if (is_using_fpu(current))
+		fpu_kill_state(current);
+
+#define COPY(x) err |= __get_user(regs->x, &sc->x)
+	COPY(d1); COPY(d2); COPY(d3);
+	COPY(a0); COPY(a1); COPY(a2); COPY(a3);
+	COPY(e0); COPY(e1); COPY(e2); COPY(e3);
+	COPY(e4); COPY(e5); COPY(e6); COPY(e7);
+	COPY(lar); COPY(lir);
+	COPY(mdr); COPY(mdrq);
+	COPY(mcvf); COPY(mcrl); COPY(mcrh);
+	COPY(sp); COPY(pc);
+#undef COPY
+
+	{
+		unsigned int tmpflags;
+#ifndef CONFIG_MN10300_USING_JTAG
+#define USER_EPSW (EPSW_FLAG_Z | EPSW_FLAG_N | EPSW_FLAG_C | EPSW_FLAG_V | \
+		   EPSW_T | EPSW_nAR)
+#else
+#define USER_EPSW (EPSW_FLAG_Z | EPSW_FLAG_N | EPSW_FLAG_C | EPSW_FLAG_V | \
+		   EPSW_nAR)
+#endif
+		err |= __get_user(tmpflags, &sc->epsw);
+		regs->epsw = (regs->epsw & ~USER_EPSW) |
+		  (tmpflags & USER_EPSW);
+		regs->orig_d0 = -1;		/* disable syscall checks */
+	}
+
+	{
+		struct fpucontext *buf;
+		err |= __get_user(buf, &sc->fpucontext);
+		if (buf) {
+			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+				goto badframe;
+			err |= fpu_restore_sigcontext(buf);
+		}
+	}
+
+	err |= __get_user(*_d0, &sc->d0);
+	return err;
+
+badframe:
+	return 1;
+}
+
+/*
+ * standard signal return syscall
+ */
+asmlinkage long sys_sigreturn(void)
+{
+	struct sigframe __user *frame = (struct sigframe __user *) __frame->sp;
+	sigset_t set;
+	long d0;
+
+	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask))
+		goto badframe;
+
+	if (_NSIG_WORDS > 1 &&
+	    __copy_from_user(&set.sig[1], &frame->extramask,
+			     sizeof(frame->extramask)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(__frame, &frame->sc, &d0))
+		goto badframe;
+
+	return d0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * realtime signal return syscall
+ */
+asmlinkage long sys_rt_sigreturn(void)
+{
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *) __frame->sp;
+	sigset_t set;
+	unsigned long d0;
+
+	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(__frame, &frame->uc.uc_mcontext, &d0))
+		goto badframe;
+
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, __frame->sp) == -EFAULT)
+		goto badframe;
+
+	return d0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * store the userspace context into a signal frame
+ */
+static int setup_sigcontext(struct sigcontext __user *sc,
+			    struct fpucontext *fpuctx,
+			    struct pt_regs *regs,
+			    unsigned long mask)
+{
+	int tmp, err = 0;
+
+#define COPY(x) err |= __put_user(regs->x, &sc->x)
+	COPY(d0); COPY(d1); COPY(d2); COPY(d3);
+	COPY(a0); COPY(a1); COPY(a2); COPY(a3);
+	COPY(e0); COPY(e1); COPY(e2); COPY(e3);
+	COPY(e4); COPY(e5); COPY(e6); COPY(e7);
+	COPY(lar); COPY(lir);
+	COPY(mdr); COPY(mdrq);
+	COPY(mcvf); COPY(mcrl); COPY(mcrh);
+	COPY(sp); COPY(epsw); COPY(pc);
+#undef COPY
+
+	tmp = fpu_setup_sigcontext(fpuctx);
+	if (tmp < 0)
+		err = 1;
+	else
+		err |= __put_user(tmp ? fpuctx : NULL, &sc->fpucontext);
+
+	/* non-iBCS2 extensions.. */
+	err |= __put_user(mask, &sc->oldmask);
+
+	return err;
+}
+
+/*
+ * determine which stack to use..
+ */
+static inline void __user *get_sigframe(struct k_sigaction *ka,
+					struct pt_regs *regs,
+					size_t frame_size)
+{
+	unsigned long sp;
+
+	/* default to using normal stack */
+	sp = regs->sp;
+
+	/* this is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (!on_sig_stack(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *) ((sp - frame_size) & ~7UL);
+}
+
+/*
+ * set up a normal signal frame
+ */
+static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+		       struct pt_regs *regs)
+{
+	struct sigframe __user *frame;
+	int rsig;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	rsig = sig;
+	if (sig < 32 &&
+	    current_thread_info()->exec_domain &&
+	    current_thread_info()->exec_domain->signal_invmap)
+		rsig = current_thread_info()->exec_domain->signal_invmap[sig];
+
+	if (__put_user(rsig, &frame->sig) < 0 ||
+	    __put_user(&frame->sc, &frame->psc) < 0)
+		goto give_sigsegv;
+
+	if (setup_sigcontext(&frame->sc, &frame->fpuctx, regs, set->sig[0]))
+		goto give_sigsegv;
+
+	if (_NSIG_WORDS > 1) {
+		if (__copy_to_user(frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
+			goto give_sigsegv;
+	}
+
+	/* set up to return from userspace.  If provided, use a stub already in
+	 * userspace */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		if (__put_user(ka->sa.sa_restorer, &frame->pretcode))
+			goto give_sigsegv;
+	} else {
+		if (__put_user((void (*)(void))frame->retcode,
+			       &frame->pretcode))
+			goto give_sigsegv;
+		/* this is mov $,d0; syscall 0 */
+		if (__put_user(0x2c, (char *)(frame->retcode + 0)) ||
+		    __put_user(__NR_sigreturn, (char *)(frame->retcode + 1)) ||
+		    __put_user(0x00, (char *)(frame->retcode + 2)) ||
+		    __put_user(0xf0, (char *)(frame->retcode + 3)) ||
+		    __put_user(0xe0, (char *)(frame->retcode + 4)))
+			goto give_sigsegv;
+		flush_icache_range((unsigned long) frame->retcode,
+				   (unsigned long) frame->retcode + 5);
+	}
+
+	/* set up registers for signal handler */
+	regs->sp = (unsigned long) frame;
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->d0 = sig;
+	regs->d1 = (unsigned long) &frame->sc;
+
+	set_fs(USER_DS);
+
+	/* the tracer may want to single-step inside the handler */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+#if DEBUG_SIG
+	printk(KERN_DEBUG "SIG deliver %d (%s:%d): sp=%p pc=%lx ra=%p\n",
+	       sig, current->comm, current->pid, frame, regs->pc,
+	       frame->pretcode);
+#endif
+
+	return 0;
+
+give_sigsegv:
+	force_sig(SIGSEGV, current);
+	return -EFAULT;
+}
+
+/*
+ * set up a realtime signal frame
+ */
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			  sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int rsig;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	rsig = sig;
+	if (sig < 32 &&
+	    current_thread_info()->exec_domain &&
+	    current_thread_info()->exec_domain->signal_invmap)
+		rsig = current_thread_info()->exec_domain->signal_invmap[sig];
+
+	if (__put_user(rsig, &frame->sig) ||
+	    __put_user(&frame->info, &frame->pinfo) ||
+	    __put_user(&frame->uc, &frame->puc) ||
+	    copy_siginfo_to_user(&frame->info, info))
+		goto give_sigsegv;
+
+	/* create the ucontext.  */
+	if (__put_user(0, &frame->uc.uc_flags) ||
+	    __put_user(0, &frame->uc.uc_link) ||
+	    __put_user((void *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp) ||
+	    __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags) ||
+	    __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size) ||
+	    setup_sigcontext(&frame->uc.uc_mcontext,
+			     &frame->fpuctx, regs, set->sig[0]) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)))
+		goto give_sigsegv;
+
+	/* set up to return from userspace.  If provided, use a stub already in
+	 * userspace */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		if (__put_user(ka->sa.sa_restorer, &frame->pretcode))
+			goto give_sigsegv;
+	} else {
+		if (__put_user((void(*)(void))frame->retcode,
+			       &frame->pretcode) ||
+		    /* This is mov $,d0; syscall 0 */
+		    __put_user(0x2c, (char *)(frame->retcode + 0)) ||
+		    __put_user(__NR_rt_sigreturn,
+			       (char *)(frame->retcode + 1)) ||
+		    __put_user(0x00, (char *)(frame->retcode + 2)) ||
+		    __put_user(0xf0, (char *)(frame->retcode + 3)) ||
+		    __put_user(0xe0, (char *)(frame->retcode + 4)))
+			goto give_sigsegv;
+
+		flush_icache_range((u_long) frame->retcode,
+				   (u_long) frame->retcode + 5);
+	}
+
+	/* Set up registers for signal handler */
+	regs->sp = (unsigned long) frame;
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->d0 = sig;
+	regs->d1 = (long) &frame->info;
+
+	set_fs(USER_DS);
+
+	/* the tracer may want to single-step inside the handler */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
+#if DEBUG_SIG
+	printk(KERN_DEBUG "SIG deliver %d (%s:%d): sp=%p pc=%lx ra=%p\n",
+	       sig, current->comm, current->pid, frame, regs->pc,
+	       frame->pretcode);
+#endif
+
+	return 0;
+
+give_sigsegv:
+	force_sig(SIGSEGV, current);
+	return -EFAULT;
+}
+
+/*
+ * handle the actual delivery of a signal to userspace
+ */
+static int handle_signal(int sig,
+			 siginfo_t *info, struct k_sigaction *ka,
+			 sigset_t *oldset, struct pt_regs *regs)
+{
+	int ret;
+
+	/* Are we from a system call? */
+	if (regs->orig_d0 >= 0) {
+		/* If so, check system call restarting.. */
+		switch (regs->d0) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->d0 = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->d0 = -EINTR;
+				break;
+			}
+
+			/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->d0 = regs->orig_d0;
+			regs->pc -= 2;
+		}
+	}
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+		ret = setup_frame(sig, ka, oldset, regs);
+
+	if (ret == 0) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked, &current->blocked,
+			  &ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked, sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	return ret;
+}
+
+/*
+ * handle a potential signal
+ */
+static void do_signal(struct pt_regs *regs)
+{
+	struct k_sigaction ka;
+	siginfo_t info;
+	sigset_t *oldset;
+	int signr;
+
+	/* we want the common case to go fast, which is why we may in certain
+	 * cases get here from kernel mode */
+	if (!user_mode(regs))
+		return;
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+		}
+
+		return;
+	}
+
+	/* did we come from a system call? */
+	if (regs->orig_d0 >= 0) {
+		/* restart the system call - no handlers present */
+		switch (regs->d0) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->d0 = regs->orig_d0;
+			regs->pc -= 2;
+			break;
+
+		case -ERESTART_RESTARTBLOCK:
+			regs->d0 = __NR_restart_syscall;
+			regs->pc -= 2;
+			break;
+		}
+	}
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
+
+/*
+ * notification of userspace execution resumption
+ * - triggered by current->work.notify_resume
+ */
+asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
+{
+	/* Pending single-step? */
+	if (thread_info_flags & _TIF_SINGLESTEP) {
+#ifndef CONFIG_MN10300_USING_JTAG
+		regs->epsw |= EPSW_T;
+		clear_thread_flag(TIF_SINGLESTEP);
+#else
+		BUG(); /* no h/w single-step if using JTAG unit */
+#endif
+	}
+
+	/* deal with pending signal delivery */
+	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
+		do_signal(regs);
+}
diff --git a/arch/mn10300/kernel/switch_to.S b/arch/mn10300/kernel/switch_to.S
new file mode 100644
index 000000000000..630aad71b946
--- /dev/null
+++ b/arch/mn10300/kernel/switch_to.S
@@ -0,0 +1,71 @@
+###############################################################################
+#
+# MN10300 Context switch operation
+#
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/cpu-regs.h>
+
+	.text
+
+###############################################################################
+#
+# struct task_struct *__switch_to(struct thread_struct *prev,
+#				  struct thread_struct *next,
+#				  struct task_struct *prev_task)
+#
+###############################################################################
+ENTRY(__switch_to)
+	movm	[d2,d3,a2,a3,exreg1],(sp)
+	or	EPSW_NMID,epsw
+
+	mov	(44,sp),d2
+
+	mov	d0,a0
+	mov	d1,a1
+
+	# save prev context
+	mov	(__frame),d0
+	mov	d0,(THREAD_FRAME,a0)
+	mov	__switch_back,d0
+	mov	d0,(THREAD_PC,a0)
+	mov	sp,a2
+	mov	a2,(THREAD_SP,a0)
+	mov	a3,(THREAD_A3,a0)
+
+	mov	(THREAD_A3,a1),a3
+	mov	(THREAD_SP,a1),a2
+
+	# switch
+	mov	a2,sp
+
+	# load next context
+	GET_THREAD_INFO a2
+	mov	a2,(__current_ti)
+	mov	(TI_task,a2),a2
+	mov	a2,(__current)
+#ifdef CONFIG_MN10300_CURRENT_IN_E2
+	mov	a2,e2
+#endif
+
+	mov	(THREAD_FRAME,a1),a2
+	mov	a2,(__frame)
+	mov	(THREAD_PC,a1),a2
+	mov	d2,d0			# for ret_from_fork
+	mov	d0,a0			# for __switch_to
+
+	jmp	(a2)
+
+__switch_back:
+	and	~EPSW_NMID,epsw
+	ret	[d2,d3,a2,a3,exreg1],32
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
new file mode 100644
index 000000000000..5f17a1ebc825
--- /dev/null
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -0,0 +1,193 @@
+/* MN10300 Weird system calls
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/syscalls.h>
+#include <linux/tty.h>
+
+#include <asm/uaccess.h>
+
+#define MIN_MAP_ADDR	PAGE_SIZE	/* minimum fixed mmap address */
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage long sys_pipe(unsigned long __user *fildes)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(fildes, fd, 2 * sizeof(int)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+/*
+ * memory mapping syscall
+ */
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff)
+{
+	struct file *file = NULL;
+	long error = -EINVAL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+	if (flags & MAP_FIXED && addr < MIN_MAP_ADDR)
+		goto out;
+
+	error = -EBADF;
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+asmlinkage long old_mmap(unsigned long addr, unsigned long len,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long fd, unsigned long offset)
+{
+	if (offset & ~PAGE_MASK)
+		return -EINVAL;
+	return sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+}
+
+struct sel_arg_struct {
+	unsigned long n;
+	fd_set *inp;
+	fd_set *outp;
+	fd_set *exp;
+	struct timeval *tvp;
+};
+
+asmlinkage int old_select(struct sel_arg_struct __user *arg)
+{
+	struct sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	/* sys_select() does the appropriate kernel locking */
+	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage long sys_ipc(uint call, int first, int second,
+			int third, void __user *ptr, long fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	switch (call) {
+	case SEMOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second, NULL);
+	case SEMTIMEDOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second,
+				      (const struct timespec __user *)fifth);
+	case SEMGET:
+		return sys_semget(first, second, third);
+	case SEMCTL: {
+		union semun fourth;
+		if (!ptr)
+			return -EINVAL;
+		if (get_user(fourth.__pad, (void __user * __user *) ptr))
+			return -EFAULT;
+		return sys_semctl(first, second, third, fourth);
+	}
+
+	case MSGSND:
+		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
+				  second, third);
+	case MSGRCV:
+		switch (version) {
+		case 0: {
+			struct ipc_kludge tmp;
+			if (!ptr)
+				return -EINVAL;
+
+			if (copy_from_user(&tmp,
+					   (struct ipc_kludge __user *) ptr,
+					   sizeof(tmp)))
+				return -EFAULT;
+			return sys_msgrcv(first, tmp.msgp, second,
+					  tmp.msgtyp, third);
+		}
+		default:
+			return sys_msgrcv(first,
+					  (struct msgbuf __user *) ptr,
+					   second, fifth, third);
+		}
+	case MSGGET:
+		return sys_msgget((key_t) first, second);
+	case MSGCTL:
+		return sys_msgctl(first, second,
+				   (struct msqid_ds __user *) ptr);
+
+	case SHMAT:
+		switch (version) {
+		default: {
+			ulong raddr;
+			ret = do_shmat(first, (char __user *) ptr, second,
+				       &raddr);
+			if (ret)
+				return ret;
+			return put_user(raddr, (ulong *) third);
+		}
+		case 1:	/* iBCS2 emulator entry point */
+			if (!segment_eq(get_fs(), get_ds()))
+				return -EINVAL;
+			return do_shmat(first, (char __user *) ptr, second,
+					(ulong *) third);
+		}
+	case SHMDT:
+		return sys_shmdt((char __user *)ptr);
+	case SHMGET:
+		return sys_shmget(first, second, third);
+	case SHMCTL:
+		return sys_shmctl(first, second,
+				  (struct shmid_ds __user *) ptr);
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
new file mode 100644
index 000000000000..ff492e3b3457
--- /dev/null
+++ b/arch/mn10300/kernel/time.c
@@ -0,0 +1,129 @@
+/* MN10300 Low level time management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from arch/i386/kernel/time.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/profile.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+#include <asm/processor.h>
+#include <asm/intctl-regs.h>
+#include <asm/rtc.h>
+
+#ifdef CONFIG_MN10300_RTC
+unsigned long mn10300_ioclk;		/* system I/O clock frequency */
+unsigned long mn10300_iobclk;		/* system I/O clock frequency */
+unsigned long mn10300_tsc_per_HZ;	/* number of ioclks per jiffy */
+#endif /* CONFIG_MN10300_RTC */
+
+static unsigned long mn10300_last_tsc;	/* time-stamp counter at last time
+					 * interrupt occurred */
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id);
+
+static struct irqaction timer_irq = {
+	.handler	= timer_interrupt,
+	.flags		= IRQF_DISABLED | IRQF_SHARED | IRQF_TIMER,
+	.mask		= CPU_MASK_NONE,
+	.name		= "timer",
+};
+
+/*
+ * scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	union {
+		unsigned long long l;
+		u32 w[2];
+	} quot;
+
+	quot.w[0] = mn10300_last_tsc - get_cycles();
+	quot.w[1] = 1000000000;
+
+	asm("mulu %2,%3,%0,%1"
+	    : "=r"(quot.w[1]), "=r"(quot.w[0])
+	    : "0"(quot.w[1]), "1"(quot.w[0])
+	    : "cc");
+
+	do_div(quot.l, MN10300_TSCCLK);
+
+	return quot.l;
+}
+
+/*
+ * advance the kernel's time keeping clocks (xtime and jiffies)
+ * - we use Timer 0 & 1 cascaded as a clock to nudge us the next time
+ *   there's a need to update
+ */
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+	unsigned tsc, elapse;
+
+	write_seqlock(&xtime_lock);
+
+	while (tsc = get_cycles(),
+	       elapse = mn10300_last_tsc - tsc, /* time elapsed since last
+						 * tick */
+	       elapse > MN10300_TSC_PER_HZ
+	       ) {
+		mn10300_last_tsc -= MN10300_TSC_PER_HZ;
+
+		/* advance the kernel's time tracking system */
+		profile_tick(CPU_PROFILING);
+		do_timer(1);
+		update_process_times(user_mode(get_irq_regs()));
+		check_rtc_time();
+	}
+
+	write_sequnlock(&xtime_lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * initialise the various timers used by the main part of the kernel
+ */
+void __init time_init(void)
+{
+	/* we need the prescalar running to be able to use IOCLK/8
+	 * - IOCLK runs at 1/4 (ST5 open) or 1/8 (ST5 closed) internal CPU clock
+	 * - IOCLK runs at Fosc rate (crystal speed)
+	 */
+	TMPSCNT |= TMPSCNT_ENABLE;
+
+	startup_timestamp_counter();
+
+	printk(KERN_INFO
+	       "timestamp counter I/O clock running at %lu.%02lu"
+	       " (calibrated against RTC)\n",
+	       MN10300_TSCCLK / 1000000, (MN10300_TSCCLK / 10000) % 100);
+
+	xtime.tv_sec = get_initial_rtc_time();
+	xtime.tv_nsec = 0;
+
+	mn10300_last_tsc = TMTSCBC;
+
+	/* use timer 0 & 1 cascaded to tick at as close to HZ as possible */
+	setup_irq(TMJCIRQ, &timer_irq);
+
+	set_intr_level(TMJCIRQ, TMJCICR_LEVEL);
+
+	startup_jiffies_counter();
+
+#ifdef CONFIG_MN10300_WD_TIMER
+	/* start the watchdog timer */
+	watchdog_go();
+#endif
+}
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
new file mode 100644
index 000000000000..8b9dc6d9dcc6
--- /dev/null
+++ b/arch/mn10300/kernel/traps.c
@@ -0,0 +1,619 @@
+/* MN10300 Exception handling
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/pci.h>
+#include <linux/kdebug.h>
+#include <linux/bug.h>
+#include <linux/irq.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-regs.h>
+#include <asm/busctl-regs.h>
+#include <asm/unit/leds.h>
+#include <asm/fpu.h>
+#include <asm/gdb-stub.h>
+#include <asm/sections.h>
+
+#if (CONFIG_INTERRUPT_VECTOR_BASE & 0xffffff)
+#error "INTERRUPT_VECTOR_BASE not aligned to 16MiB boundary!"
+#endif
+
+struct pt_regs *__frame; /* current frame pointer */
+EXPORT_SYMBOL(__frame);
+
+int kstack_depth_to_print = 24;
+
+spinlock_t die_lock = __SPIN_LOCK_UNLOCKED(die_lock);
+
+ATOMIC_NOTIFIER_HEAD(mn10300_die_chain);
+
+/*
+ * These constants are for searching for possible module text
+ * segments. MODULE_RANGE is a guess of how much space is likely
+ * to be vmalloced.
+ */
+#define MODULE_RANGE (8 * 1024 * 1024)
+
+#define DO_ERROR(signr, prologue, str, name)			\
+asmlinkage void name(struct pt_regs *regs, u32 intcode)		\
+{								\
+	prologue;						\
+	if (die_if_no_fixup(str, regs, intcode))		\
+		return;						\
+	force_sig(signr, current);				\
+}
+
+#define DO_EINFO(signr, prologue, str, name, sicode)			\
+asmlinkage void name(struct pt_regs *regs, u32 intcode)			\
+{									\
+	siginfo_t info;							\
+	prologue;							\
+	if (die_if_no_fixup(str, regs, intcode))			\
+		return;							\
+	info.si_signo = signr;						\
+	if (signr == SIGILL && sicode == ILL_ILLOPC) {			\
+		uint8_t opcode;						\
+		if (get_user(opcode, (uint8_t __user *)regs->pc) == 0)	\
+			if (opcode == 0xff)				\
+				info.si_signo = SIGTRAP;		\
+	}								\
+	info.si_errno = 0;						\
+	info.si_code = sicode;						\
+	info.si_addr = (void *) regs->pc;				\
+	force_sig_info(info.si_signo, &info, current);			\
+}
+
+DO_ERROR(SIGTRAP, {}, "trap",			trap);
+DO_ERROR(SIGSEGV, {}, "ibreak",			ibreak);
+DO_ERROR(SIGSEGV, {}, "obreak",			obreak);
+DO_EINFO(SIGSEGV, {}, "access error",		access_error,	SEGV_ACCERR);
+DO_EINFO(SIGSEGV, {}, "insn access error",	insn_acc_error,	SEGV_ACCERR);
+DO_EINFO(SIGSEGV, {}, "data access error",	data_acc_error,	SEGV_ACCERR);
+DO_EINFO(SIGILL,  {}, "privileged opcode",	priv_op,	ILL_PRVOPC);
+DO_EINFO(SIGILL,  {}, "invalid opcode",		invalid_op,	ILL_ILLOPC);
+DO_EINFO(SIGILL,  {}, "invalid ex opcode",	invalid_exop,	ILL_ILLOPC);
+DO_EINFO(SIGBUS,  {}, "invalid address",	mem_error,	BUS_ADRERR);
+DO_EINFO(SIGBUS,  {}, "bus error",		bus_error,	BUS_ADRERR);
+DO_EINFO(SIGILL,  {}, "FPU invalid opcode", 	fpu_invalid_op,	ILL_COPROC);
+
+DO_ERROR(SIGTRAP,
+#ifndef CONFIG_MN10300_USING_JTAG
+	 DCR &= ~0x0001,
+#else
+	 {},
+#endif
+	 "single step", istep);
+
+/*
+ * handle NMI
+ */
+asmlinkage void nmi(struct pt_regs *regs, enum exception_code code)
+{
+	/* see if gdbstub wants to deal with it */
+#ifdef CONFIG_GDBSTUB
+	if (gdbstub_intercept(regs, code))
+		return;
+#endif
+
+	printk(KERN_WARNING "--- Register Dump ---\n");
+	show_registers(regs);
+	printk(KERN_WARNING "---------------------\n");
+}
+
+/*
+ * show a stack trace from the specified stack pointer
+ */
+void show_trace(unsigned long *sp)
+{
+	unsigned long *stack, addr, module_start, module_end;
+	int i;
+
+	printk(KERN_EMERG "\n"
+	       KERN_EMERG "Call Trace:");
+
+	stack = sp;
+	i = 0;
+	module_start = VMALLOC_START;
+	module_end = VMALLOC_END;
+
+	while (((long) stack & (THREAD_SIZE - 1)) != 0) {
+		addr = *stack++;
+		if (__kernel_text_address(addr)) {
+#if 1
+			printk(" [<%08lx>]", addr);
+			print_symbol(" %s", addr);
+			printk("\n");
+#else
+			if ((i % 6) == 0)
+				printk("\n" KERN_EMERG "  ");
+			printk("[<%08lx>] ", addr);
+			i++;
+#endif
+		}
+	}
+
+	printk("\n");
+}
+
+/*
+ * show the raw stack from the specified stack pointer
+ */
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+	unsigned long *stack;
+	int i;
+
+	if (!sp)
+		sp = (unsigned long *) &sp;
+
+	stack = sp;
+	printk(KERN_EMERG "Stack:");
+	for (i = 0; i < kstack_depth_to_print; i++) {
+		if (((long) stack & (THREAD_SIZE - 1)) == 0)
+			break;
+		if ((i % 8) == 0)
+			printk("\n" KERN_EMERG "  ");
+		printk("%08lx ", *stack++);
+	}
+
+	show_trace(sp);
+}
+
+/*
+ * the architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+	unsigned long stack;
+
+	show_stack(current, &stack);
+}
+EXPORT_SYMBOL(dump_stack);
+
+/*
+ * dump the register file in the specified exception frame
+ */
+void show_registers_only(struct pt_regs *regs)
+{
+	unsigned long ssp;
+
+	ssp = (unsigned long) regs + sizeof(*regs);
+
+	printk(KERN_EMERG "PC:  %08lx EPSW:  %08lx  SSP: %08lx mode: %s\n",
+	       regs->pc, regs->epsw, ssp, user_mode(regs) ? "User" : "Super");
+	printk(KERN_EMERG "d0:  %08lx   d1:  %08lx   d2: %08lx   d3: %08lx\n",
+	       regs->d0, regs->d1, regs->d2, regs->d3);
+	printk(KERN_EMERG "a0:  %08lx   a1:  %08lx   a2: %08lx   a3: %08lx\n",
+	       regs->a0, regs->a1, regs->a2, regs->a3);
+	printk(KERN_EMERG "e0:  %08lx   e1:  %08lx   e2: %08lx   e3: %08lx\n",
+	       regs->e0, regs->e1, regs->e2, regs->e3);
+	printk(KERN_EMERG "e4:  %08lx   e5:  %08lx   e6: %08lx   e7: %08lx\n",
+	       regs->e4, regs->e5, regs->e6, regs->e7);
+	printk(KERN_EMERG "lar: %08lx   lir: %08lx  mdr: %08lx  usp: %08lx\n",
+	       regs->lar, regs->lir, regs->mdr, regs->sp);
+	printk(KERN_EMERG "cvf: %08lx   crl: %08lx  crh: %08lx  drq: %08lx\n",
+	       regs->mcvf, regs->mcrl, regs->mcrh, regs->mdrq);
+	printk(KERN_EMERG "threadinfo=%p task=%p)\n",
+	       current_thread_info(), current);
+
+	if ((unsigned long) current >= 0x90000000UL &&
+	    (unsigned long) current < 0x94000000UL)
+		printk(KERN_EMERG "Process %s (pid: %d)\n",
+		       current->comm, current->pid);
+
+	printk(KERN_EMERG "CPUP:   %04hx\n", CPUP);
+	printk(KERN_EMERG "TBR:    %08x\n", TBR);
+	printk(KERN_EMERG "DEAR:   %08x\n", DEAR);
+	printk(KERN_EMERG "sISR:   %08x\n", sISR);
+	printk(KERN_EMERG "NMICR:  %04hx\n", NMICR);
+	printk(KERN_EMERG "BCBERR: %08x\n", BCBERR);
+	printk(KERN_EMERG "BCBEAR: %08x\n", BCBEAR);
+	printk(KERN_EMERG "MMUFCR: %08x\n", MMUFCR);
+	printk(KERN_EMERG "IPTEU : %08x  IPTEL2: %08x\n", IPTEU, IPTEL2);
+	printk(KERN_EMERG "DPTEU:  %08x  DPTEL2: %08x\n", DPTEU, DPTEL2);
+}
+
+/*
+ * dump the registers and the stack
+ */
+void show_registers(struct pt_regs *regs)
+{
+	unsigned long sp;
+	int i;
+
+	show_registers_only(regs);
+
+	if (!user_mode(regs))
+		sp = (unsigned long) regs + sizeof(*regs);
+	else
+		sp = regs->sp;
+
+	/* when in-kernel, we also print out the stack and code at the
+	 * time of the fault..
+	 */
+	if (!user_mode(regs)) {
+		printk(KERN_EMERG "\n");
+		show_stack(current, (unsigned long *) sp);
+
+#if 0
+		printk(KERN_EMERG "\n"
+		       KERN_EMERG "Code: ");
+		if (regs->pc < PAGE_OFFSET)
+			goto bad;
+
+		for (i = 0; i < 20; i++) {
+			unsigned char c;
+			if (__get_user(c, &((unsigned char *) regs->pc)[i]))
+				goto bad;
+			printk("%02x ", c);
+		}
+#else
+		i = 0;
+#endif
+	}
+
+	printk("\n");
+	return;
+
+#if 0
+bad:
+	printk(KERN_EMERG " Bad PC value.");
+	break;
+#endif
+}
+
+/*
+ *
+ */
+void show_trace_task(struct task_struct *tsk)
+{
+	unsigned long sp = tsk->thread.sp;
+
+	/* User space on another CPU? */
+	if ((sp ^ (unsigned long) tsk) & (PAGE_MASK << 1))
+		return;
+
+	show_trace((unsigned long *) sp);
+}
+
+/*
+ * note the untimely death of part of the kernel
+ */
+void die(const char *str, struct pt_regs *regs, enum exception_code code)
+{
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	printk(KERN_EMERG "\n"
+	       KERN_EMERG "%s: %04x\n",
+	       str, code & 0xffff);
+	show_registers(regs);
+
+	if (regs->pc >= 0x02000000 && regs->pc < 0x04000000 &&
+	    (regs->epsw & (EPSW_IM | EPSW_IE)) != (EPSW_IM | EPSW_IE)) {
+		printk(KERN_EMERG "Exception in usermode interrupt handler\n");
+		printk(KERN_EMERG "\n"
+		       KERN_EMERG "  Please connect to kernel debugger !!\n");
+		asm volatile ("0: bra 0b");
+	}
+
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+/*
+ * see if there's a fixup handler we can force a jump to when an exception
+ * happens due to something kernel code did
+ */
+int die_if_no_fixup(const char *str, struct pt_regs *regs,
+		    enum exception_code code)
+{
+	if (user_mode(regs))
+		return 0;
+
+	peripheral_leds_display_exception(code);
+
+	switch (code) {
+		/* see if we can fixup the kernel accessing memory */
+	case EXCEP_ITLBMISS:
+	case EXCEP_DTLBMISS:
+	case EXCEP_IAERROR:
+	case EXCEP_DAERROR:
+	case EXCEP_MEMERR:
+	case EXCEP_MISALIGN:
+	case EXCEP_BUSERROR:
+	case EXCEP_ILLDATACC:
+	case EXCEP_IOINSACC:
+	case EXCEP_PRIVINSACC:
+	case EXCEP_PRIVDATACC:
+	case EXCEP_DATINSACC:
+		if (fixup_exception(regs))
+			return 1;
+	case EXCEP_UNIMPINS:
+		if (regs->pc && *(uint8_t *)regs->pc == 0xff)
+			if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
+				return 1;
+		break;
+	default:
+		break;
+	}
+
+	/* see if gdbstub wants to deal with it */
+#ifdef CONFIG_GDBSTUB
+	if (gdbstub_intercept(regs, code))
+		return 1;
+#endif
+
+	if (notify_die(DIE_GPF, str, regs, code, 0, 0))
+		return 1;
+
+	/* make the process die as the last resort */
+	die(str, regs, code);
+}
+
+/*
+ * handle unsupported syscall instructions (syscall 1-15)
+ */
+static asmlinkage void unsupported_syscall(struct pt_regs *regs,
+					   enum exception_code code)
+{
+	struct task_struct *tsk = current;
+	siginfo_t info;
+
+	/* catch a kernel BUG() */
+	if (code == EXCEP_SYSCALL15 && !user_mode(regs)) {
+		if (report_bug(regs->pc, regs) == BUG_TRAP_TYPE_BUG) {
+#ifdef CONFIG_GDBSTUB
+			__gdbstub_bug_trap();
+#endif
+		}
+	}
+
+	regs->pc -= 2; /* syscall return addr is _after_ the instruction */
+
+	die_if_no_fixup("An unsupported syscall insn was used by the kernel\n",
+			regs, code);
+
+	info.si_signo	= SIGILL;
+	info.si_errno	= ENOSYS;
+	info.si_code	= ILL_ILLTRP;
+	info.si_addr	= (void *) regs->pc;
+	force_sig_info(SIGILL, &info, tsk);
+}
+
+/*
+ * display the register file when the stack pointer gets clobbered
+ */
+asmlinkage void do_double_fault(struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+
+	strcpy(tsk->comm, "emergency tsk");
+	tsk->pid = 0;
+	console_verbose();
+	printk(KERN_EMERG "--- double fault ---\n");
+	show_registers(regs);
+}
+
+/*
+ * asynchronous bus error (external, usually I/O DMA)
+ */
+asmlinkage void io_bus_error(u32 bcberr, u32 bcbear, struct pt_regs *regs)
+{
+	console_verbose();
+
+	printk(KERN_EMERG "\n"
+	       KERN_EMERG "Asynchronous I/O Bus Error\n"
+	       KERN_EMERG "==========================\n");
+
+	if (bcberr & BCBERR_BEME)
+		printk(KERN_EMERG "- Multiple recorded errors\n");
+
+	printk(KERN_EMERG "- Faulting Buses:%s%s%s\n",
+	       bcberr & BCBERR_BEMR_CI  ? " CPU-Ins-Fetch" : "",
+	       bcberr & BCBERR_BEMR_CD  ? " CPU-Data" : "",
+	       bcberr & BCBERR_BEMR_DMA ? " DMA" : "");
+
+	printk(KERN_EMERG "- %s %s access made to %s at address %08x\n",
+	       bcberr &	BCBERR_BEBST ? "Burst" : "Single",
+	       bcberr &	BCBERR_BERW ? "Read" : "Write",
+	       bcberr &	BCBERR_BESB_MON  ? "Monitor Space" :
+	       bcberr &	BCBERR_BESB_IO   ? "Internal CPU I/O Space" :
+	       bcberr &	BCBERR_BESB_EX   ? "External I/O Bus" :
+	       bcberr &	BCBERR_BESB_OPEX ? "External Memory Bus" :
+	       "On Chip Memory",
+	       bcbear
+	       );
+
+	printk(KERN_EMERG "- Detected by the %s\n",
+	       bcberr&BCBERR_BESD ? "Bus Control Unit" : "Slave Bus");
+
+#ifdef CONFIG_PCI
+#define BRIDGEREGB(X) (*(volatile __u8  *)(0xBE040000 + (X)))
+#define BRIDGEREGW(X) (*(volatile __u16 *)(0xBE040000 + (X)))
+#define BRIDGEREGL(X) (*(volatile __u32 *)(0xBE040000 + (X)))
+
+	printk(KERN_EMERG "- PCI Memory Paging Reg:         %08x\n",
+	       *(volatile __u32 *) (0xBFFFFFF4));
+	printk(KERN_EMERG "- PCI Bridge Base Address 0:     %08x\n",
+	       BRIDGEREGL(PCI_BASE_ADDRESS_0));
+	printk(KERN_EMERG "- PCI Bridge AMPCI Base Address: %08x\n",
+	       BRIDGEREGL(0x48));
+	printk(KERN_EMERG "- PCI Bridge Command:                %04hx\n",
+	       BRIDGEREGW(PCI_COMMAND));
+	printk(KERN_EMERG "- PCI Bridge Status:                 %04hx\n",
+	       BRIDGEREGW(PCI_STATUS));
+	printk(KERN_EMERG "- PCI Bridge Int Status:         %08hx\n",
+	       BRIDGEREGL(0x4c));
+#endif
+
+	printk(KERN_EMERG "\n");
+	show_registers(regs);
+
+	panic("Halted due to asynchronous I/O Bus Error\n");
+}
+
+/*
+ * handle an exception for which a handler has not yet been installed
+ */
+asmlinkage void uninitialised_exception(struct pt_regs *regs,
+					enum exception_code code)
+{
+
+	/* see if gdbstub wants to deal with it */
+#ifdef CONFIG_GDBSTUB
+	if (gdbstub_intercept(regs, code))
+		return;
+#endif
+
+	peripheral_leds_display_exception(code);
+	printk(KERN_EMERG "Uninitialised Exception 0x%04x\n", code & 0xFFFF);
+	show_registers(regs);
+
+	for (;;)
+		continue;
+}
+
+/*
+ * set an interrupt stub to jump to a handler
+ * ! NOTE: this does *not* flush the caches
+ */
+void __init __set_intr_stub(enum exception_code code, void *handler)
+{
+	unsigned long addr;
+	u8 *vector = (u8 *)(CONFIG_INTERRUPT_VECTOR_BASE + code);
+
+	addr = (unsigned long) handler - (unsigned long) vector;
+	vector[0] = 0xdc;		/* JMP handler */
+	vector[1] = addr;
+	vector[2] = addr >> 8;
+	vector[3] = addr >> 16;
+	vector[4] = addr >> 24;
+	vector[5] = 0xcb;
+	vector[6] = 0xcb;
+	vector[7] = 0xcb;
+}
+
+/*
+ * set an interrupt stub to jump to a handler
+ */
+void __init set_intr_stub(enum exception_code code, void *handler)
+{
+	unsigned long addr;
+	u8 *vector = (u8 *)(CONFIG_INTERRUPT_VECTOR_BASE + code);
+
+	addr = (unsigned long) handler - (unsigned long) vector;
+	vector[0] = 0xdc;		/* JMP handler */
+	vector[1] = addr;
+	vector[2] = addr >> 8;
+	vector[3] = addr >> 16;
+	vector[4] = addr >> 24;
+	vector[5] = 0xcb;
+	vector[6] = 0xcb;
+	vector[7] = 0xcb;
+
+	mn10300_dcache_flush_inv();
+	mn10300_icache_inv();
+}
+
+/*
+ * set an interrupt stub to invoke the JTAG unit and then jump to a handler
+ */
+void __init set_jtag_stub(enum exception_code code, void *handler)
+{
+	unsigned long addr;
+	u8 *vector = (u8 *)(CONFIG_INTERRUPT_VECTOR_BASE + code);
+
+	addr = (unsigned long) handler - ((unsigned long) vector + 1);
+	vector[0] = 0xff;		/* PI to jump into JTAG debugger */
+	vector[1] = 0xdc;		/* jmp handler */
+	vector[2] = addr;
+	vector[3] = addr >> 8;
+	vector[4] = addr >> 16;
+	vector[5] = addr >> 24;
+	vector[6] = 0xcb;
+	vector[7] = 0xcb;
+
+	mn10300_dcache_flush_inv();
+	flush_icache_range((unsigned long) vector, (unsigned long) vector + 8);
+}
+
+/*
+ * initialise the exception table
+ */
+void __init trap_init(void)
+{
+	set_excp_vector(EXCEP_TRAP,		trap);
+	set_excp_vector(EXCEP_ISTEP,		istep);
+	set_excp_vector(EXCEP_IBREAK,		ibreak);
+	set_excp_vector(EXCEP_OBREAK,		obreak);
+
+	set_excp_vector(EXCEP_PRIVINS,		priv_op);
+	set_excp_vector(EXCEP_UNIMPINS,		invalid_op);
+	set_excp_vector(EXCEP_UNIMPEXINS,	invalid_exop);
+	set_excp_vector(EXCEP_MEMERR,		mem_error);
+	set_excp_vector(EXCEP_MISALIGN,		misalignment);
+	set_excp_vector(EXCEP_BUSERROR,		bus_error);
+	set_excp_vector(EXCEP_ILLINSACC,	insn_acc_error);
+	set_excp_vector(EXCEP_ILLDATACC,	data_acc_error);
+	set_excp_vector(EXCEP_IOINSACC,		insn_acc_error);
+	set_excp_vector(EXCEP_PRIVINSACC,	insn_acc_error);
+	set_excp_vector(EXCEP_PRIVDATACC,	data_acc_error);
+	set_excp_vector(EXCEP_DATINSACC,	insn_acc_error);
+	set_excp_vector(EXCEP_FPU_DISABLED,	fpu_disabled);
+	set_excp_vector(EXCEP_FPU_UNIMPINS,	fpu_invalid_op);
+	set_excp_vector(EXCEP_FPU_OPERATION,	fpu_exception);
+
+	set_excp_vector(EXCEP_NMI,		nmi);
+
+	set_excp_vector(EXCEP_SYSCALL1,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL2,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL3,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL4,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL5,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL6,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL7,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL8,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL9,		unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL10,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL11,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL12,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL13,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL14,	unsupported_syscall);
+	set_excp_vector(EXCEP_SYSCALL15,	unsupported_syscall);
+}
+
+/*
+ * determine if a program counter value is a valid bug address
+ */
+int is_valid_bugaddr(unsigned long pc)
+{
+	return pc >= PAGE_OFFSET;
+}
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..a3e80f444f55
--- /dev/null
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -0,0 +1,159 @@
+/* MN10300 Main kernel linker script
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#define __VMLINUX_LDS__
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+
+OUTPUT_FORMAT("elf32-am33lin", "elf32-am33lin", "elf32-am33lin")
+OUTPUT_ARCH(mn10300)
+ENTRY(_start)
+jiffies = jiffies_64;
+#ifndef CONFIG_MN10300_CURRENT_IN_E2
+current = __current;
+#endif
+SECTIONS
+{
+  . = CONFIG_KERNEL_TEXT_ADDRESS;
+  /* read-only */
+  _stext = .;
+  _text = .;			/* Text and read-only data */
+  .text : {
+	*(
+	.text.head
+	.text
+	)
+	TEXT_TEXT
+	SCHED_TEXT
+	LOCK_TEXT
+	KPROBES_TEXT
+	*(.fixup)
+	*(.gnu.warning)
+	} = 0xcb
+
+  _etext = .;			/* End of text section */
+
+  . = ALIGN(16);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  BUG_TABLE
+
+  RODATA
+
+  /* writeable */
+  .data : {			/* Data */
+	DATA_DATA
+	CONSTRUCTORS
+	}
+
+  . = ALIGN(4096);
+  __nosave_begin = .;
+  .data_nosave : { *(.data.nosave) }
+  . = ALIGN(4096);
+  __nosave_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  /* rarely changed data like cpu maps */
+  . = ALIGN(32);
+  .data.read_mostly : AT(ADDR(.data.read_mostly)) {
+	*(.data.read_mostly)
+	_edata = .;		/* End of data section */
+  }
+
+  . = ALIGN(THREAD_SIZE);	/* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  /* might get freed after init */
+  . = ALIGN(4096);
+  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+  	__smp_locks = .;
+	*(.smp_locks)
+	__smp_locks_end = .;
+  }
+
+  /* will be freed after init */
+  . = ALIGN(4096);		/* Init code and data */
+  __init_begin = .;
+  .init.text : {
+	_sinittext = .;
+	*(.init.text)
+	_einittext = .;
+  }
+  .init.data : { *(.init.data) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { KEEP(*(.init.setup)) }
+  __setup_end = .;
+
+  __initcall_start = .;
+  .initcall.init : {
+	INITCALLS
+  }
+  __initcall_end = .;
+  __con_initcall_start = .;
+  .con_initcall.init : { *(.con_initcall.init) }
+  __con_initcall_end = .;
+
+  SECURITY_INIT
+  . = ALIGN(4);
+  __alt_instructions = .;
+  .altinstructions : { *(.altinstructions) }
+  __alt_instructions_end = .;
+ .altinstr_replacement : { *(.altinstr_replacement) }
+  /* .exit.text is discard at runtime, not link time, to deal with references
+     from .altinstructions and .eh_frame */
+  .exit.text : { *(.exit.text) }
+  .exit.data : { *(.exit.data) }
+
+#ifdef CONFIG_BLK_DEV_INITRD
+  . = ALIGN(4096);
+  __initramfs_start = .;
+  .init.ramfs : { *(.init.ramfs) }
+  __initramfs_end = .;
+#endif
+
+  . = ALIGN(32);
+  __per_cpu_start = .;
+  .data.percpu  : { *(.data.percpu) }
+  __per_cpu_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+  /* freed after init ends here */
+
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss.page_aligned)
+	*(.bss)
+  }
+  . = ALIGN(4);
+  __bss_stop = .;
+
+  _end = . ;
+
+  /* This is where the kernel creates the early boot page tables */
+  . = ALIGN(4096);
+  pg0 = .;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.exitcall.exit)
+	}
+
+  STABS_DEBUG
+
+  DWARF_DEBUG
+}
diff --git a/arch/mn10300/lib/Makefile b/arch/mn10300/lib/Makefile
new file mode 100644
index 000000000000..fdfa9ec5b5bb
--- /dev/null
+++ b/arch/mn10300/lib/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the MN10300-specific library files..
+#
+
+lib-y = delay.o usercopy.o checksum.o bitops.o memcpy.o memmove.o memset.o
+lib-y += do_csum.o
+lib-y += __ashldi3.o __ashrdi3.o __lshrdi3.o negdi2.o
diff --git a/arch/mn10300/lib/__ashldi3.S b/arch/mn10300/lib/__ashldi3.S
new file mode 100644
index 000000000000..a51a9506f00c
--- /dev/null
+++ b/arch/mn10300/lib/__ashldi3.S
@@ -0,0 +1,51 @@
+/* MN10300 64-bit arithmetic left shift
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# unsigned long long __ashldi3(unsigned long long value [D1:D0],
+#			       unsigned by [(12,SP)])
+#
+###############################################################################
+        .globl	__ashldi3
+        .type	__ashldi3,@function
+__ashldi3:
+	mov	(12,sp),a0
+	and	+63,a0
+	beq	__ashldi3_zero
+
+	cmp	+31,a0
+	bhi	__ashldi3_32plus
+
+	# the count is in the range 1-31
+	asl	a0,d1
+
+	mov	+32,a1
+	sub	a0,a1,a1			# a1 = 32 - count
+	lsr	a1,d0,a1			# get overflow from LSW -> MSW
+
+	or_asl	a1,d1,a0,d0			# insert overflow into MSW and
+						# shift the LSW
+	rets
+
+	.balign	L1_CACHE_BYTES
+	# the count is in the range 32-63
+__ashldi3_32plus:
+	asl	a0,d0,d1
+	clr	d0
+__ashldi3_zero:
+	rets
+
+	.size	__ashldi3, .-__ashldi3
diff --git a/arch/mn10300/lib/__ashrdi3.S b/arch/mn10300/lib/__ashrdi3.S
new file mode 100644
index 000000000000..6f42382728cb
--- /dev/null
+++ b/arch/mn10300/lib/__ashrdi3.S
@@ -0,0 +1,52 @@
+/* MN10300 64-bit arithmetic right shift
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# unsigned long long __ashrdi3(unsigned long long value [D1:D0],
+#			       unsigned by [(12,SP)])
+#
+###############################################################################
+        .globl	__ashrdi3
+        .type	__ashrdi3,@function
+__ashrdi3:
+	mov	(12,sp),a0
+	and	+63,a0
+	beq	__ashrdi3_zero
+
+	cmp	+31,a0
+	bhi	__ashrdi3_32plus
+
+	# the count is in the range 1-31
+	lsr	a0,d0
+
+	mov	+32,a1
+	sub	a0,a1,a1			# a1 = 32 - count
+	asl	a1,d1,a1			# get underflow from MSW -> LSW
+
+	or_asr	a1,d0,a0,d1			# insert underflow into LSW and
+						# shift the MSW
+	rets
+
+	.balign	L1_CACHE_BYTES
+	# the count is in the range 32-63
+__ashrdi3_32plus:
+	asr	a0,d1,d0
+	ext	d0				# sign-extend result through MDR
+	mov	mdr,d1
+__ashrdi3_zero:
+	rets
+
+	.size	__ashrdi3, .-__ashrdi3
diff --git a/arch/mn10300/lib/__lshrdi3.S b/arch/mn10300/lib/__lshrdi3.S
new file mode 100644
index 000000000000..a686aef31e90
--- /dev/null
+++ b/arch/mn10300/lib/__lshrdi3.S
@@ -0,0 +1,52 @@
+/* MN10300 64-bit logical right shift
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <asm/cache.h>
+
+        .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# unsigned long long __lshrdi3(unsigned long long value [D1:D0],
+#			       unsigned by [(12,SP)])
+#
+###############################################################################
+        .globl	__lshrdi3
+        .type	__lshrdi3,@function
+__lshrdi3:
+	mov	(12,sp),a0
+	and	+63,a0
+	beq	__lshrdi3_zero
+
+	cmp	+31,a0
+	bhi	__lshrdi3_32plus
+
+	# the count is in the range 1-31
+	lsr	a0,d0
+
+	mov	+32,a1
+	sub	a0,a1,a1			# a1 = 32 - count
+	asl	a1,d1,a1			# get underflow from MSW -> LSW
+
+	or_lsr	a1,d0,a0,d1			# insert underflow into LSW and
+						# shift the MSW
+	rets
+
+	.balign	L1_CACHE_BYTES
+	# the count is in the range 32-63
+__lshrdi3_32plus:
+	lsr	a0,d1,d0
+	clr	d1
+__lshrdi3_zero:
+	rets
+
+	.size	__lshrdi3, .-__lshrdi3
diff --git a/arch/mn10300/lib/ashrdi3.c b/arch/mn10300/lib/ashrdi3.c
new file mode 100644
index 000000000000..c54f61ddf0b5
--- /dev/null
+++ b/arch/mn10300/lib/ashrdi3.c
@@ -0,0 +1,61 @@
+/* ashrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public Licence as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public Licence for more details.
+
+You should have received a copy of the GNU General Public Licence
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#define BITS_PER_UNIT 8
+
+typedef 	 int SItype	__attribute__((mode(SI)));
+typedef unsigned int USItype	__attribute__((mode(SI)));
+typedef		 int DItype	__attribute__((mode(DI)));
+typedef		 int word_type	__attribute__((mode(__word__)));
+
+struct DIstruct {
+	SItype low;
+	SItype high;
+};
+
+union DIunion {
+	struct DIstruct	s;
+	DItype		ll;
+};
+
+DItype __ashrdi3(DItype u, word_type b)
+{
+	union DIunion w;
+	union DIunion uu;
+	word_type bm;
+
+	if (b == 0)
+		return u;
+
+	uu.ll = u;
+
+	bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
+	if (bm <= 0) {
+		/* w.s.high = 1..1 or 0..0 */
+		w.s.high = uu.s.high >> (sizeof(SItype) * BITS_PER_UNIT - 1);
+		w.s.low = uu.s.high >> -bm;
+	} else {
+		USItype carries = (USItype)uu.s.high << bm;
+		w.s.high = uu.s.high >> b;
+		w.s.low = ((USItype)uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
diff --git a/arch/mn10300/lib/bitops.c b/arch/mn10300/lib/bitops.c
new file mode 100644
index 000000000000..440a7dcbf87b
--- /dev/null
+++ b/arch/mn10300/lib/bitops.c
@@ -0,0 +1,51 @@
+/* MN10300 Non-trivial bit operations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+
+/*
+ * try flipping a bit using BSET and BCLR
+ */
+void change_bit(int nr, volatile void *addr)
+{
+	if (test_bit(nr, addr))
+		goto try_clear_bit;
+
+try_set_bit:
+	if (!test_and_set_bit(nr, addr))
+		return;
+
+try_clear_bit:
+	if (test_and_clear_bit(nr, addr))
+		return;
+
+	goto try_set_bit;
+}
+
+/*
+ * try flipping a bit using BSET and BCLR and returning the old value
+ */
+int test_and_change_bit(int nr, volatile void *addr)
+{
+	if (test_bit(nr, addr))
+		goto try_clear_bit;
+
+try_set_bit:
+	if (!test_and_set_bit(nr, addr))
+		return 0;
+
+try_clear_bit:
+	if (test_and_clear_bit(nr, addr))
+		return 1;
+
+	goto try_set_bit;
+}
diff --git a/arch/mn10300/lib/checksum.c b/arch/mn10300/lib/checksum.c
new file mode 100644
index 000000000000..274f29ec33c1
--- /dev/null
+++ b/arch/mn10300/lib/checksum.c
@@ -0,0 +1,99 @@
+/* MN10300 Optimised checksumming wrappers
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include "internal.h"
+
+static inline unsigned short from32to16(__wsum sum)
+{
+	asm("	add	%1,%0		\n"
+	    "	addc	0xffff,%0	\n"
+	    : "=r" (sum)
+	    : "r" (sum << 16), "0" (sum & 0xffff0000)
+	    );
+	return sum >> 16;
+}
+
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return ~do_csum(iph, ihl * 4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	__wsum result;
+
+	result = do_csum(buff, len);
+	result += sum;
+	if (sum > result)
+		result++;
+	return result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return ~from32to16(do_csum(buff, len));
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+__wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+	copy_from_user(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				 int len, __wsum sum)
+{
+	sum = csum_partial(src, len, sum);
+	memcpy(dst, src, len);
+	return sum;
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+
+__wsum csum_partial_copy_from_user(const void *src, void *dst,
+				   int len, __wsum sum,
+				   int *err_ptr)
+{
+	int missing;
+
+	missing = copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*err_ptr = -EFAULT;
+	}
+
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+__wsum csum_and_copy_to_user(const void *src, void *dst,
+			     int len, __wsum sum,
+			     int *err_ptr)
+{
+	int missing;
+
+	missing = copy_to_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*err_ptr = -EFAULT;
+	}
+
+	return csum_partial(src, len, sum);
+}
+EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/mn10300/lib/delay.c b/arch/mn10300/lib/delay.c
new file mode 100644
index 000000000000..cce66bc0822d
--- /dev/null
+++ b/arch/mn10300/lib/delay.c
@@ -0,0 +1,50 @@
+/* MN10300 Short delay interpolation routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/div64.h>
+
+/*
+ * basic delay loop
+ */
+void __delay(unsigned long loops)
+{
+	int d0;
+
+	asm volatile(
+		"	bra	1f	\n"
+		"	.align	4	\n"
+		"1:	bra	2f	\n"
+		"	.align	4	\n"
+		"2:	add	-1,%0	\n"
+		"	bne	2b	\n"
+		: "=&d" (d0)
+		: "0" (loops));
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * handle a delay specified in terms of microseconds
+ */
+void __udelay(unsigned long usecs)
+{
+	signed long ioclk, stop;
+
+	/* usecs * CLK / 1E6 */
+	stop = __muldiv64u(usecs, MN10300_TSCCLK, 1000000);
+	stop = TMTSCBC - stop;
+
+	do {
+		ioclk = TMTSCBC;
+	} while (stop < ioclk);
+}
+EXPORT_SYMBOL(__udelay);
diff --git a/arch/mn10300/lib/do_csum.S b/arch/mn10300/lib/do_csum.S
new file mode 100644
index 000000000000..e138994e1667
--- /dev/null
+++ b/arch/mn10300/lib/do_csum.S
@@ -0,0 +1,162 @@
+/* Optimised simple memory checksum
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .section .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# unsigned int do_csum(const unsigned char *buff, size_t len)
+#
+###############################################################################
+	.globl	do_csum
+        .type	do_csum,@function
+do_csum:
+	movm	[d2,d3],(sp)
+	mov	d0,(12,sp)
+	mov	d1,(16,sp)
+	mov	d1,d2				# count
+	mov	d0,a0				# buff
+	clr	d1				# accumulator
+
+	cmp	+0,d2
+	beq	do_csum_done			# return if zero-length buffer
+
+	# 4-byte align the buffer pointer
+	btst	+3,a0
+	beq	do_csum_now_4b_aligned
+
+	btst	+1,a0
+	beq	do_csum_addr_not_odd
+	movbu	(a0),d0
+	inc	a0
+	asl	+8,d0
+	add	d0,d1
+	addc	+0,d1
+	add	-1,d2
+do_csum_addr_not_odd:
+
+	cmp	+2,d2
+	bcs	do_csum_fewer_than_4
+	btst	+2,a0
+	beq	do_csum_now_4b_aligned
+	movhu	(a0+),d0
+	add	d0,d1
+	addc	+0,d1
+	add	-2,d2
+	cmp	+4,d2
+	bcs	do_csum_fewer_than_4
+
+do_csum_now_4b_aligned:
+	# we want to checksum as much as we can in chunks of 32 bytes
+	cmp	+31,d2
+	bls	do_csum_remainder		# 4-byte aligned remainder
+
+	add	-32,d2
+	mov	+32,d3
+
+do_csum_loop:
+	mov	(a0+),d0
+	add	d0,d1
+	mov	(a0+),e0
+	addc	e0,d1
+	mov	(a0+),e1
+	addc	e1,d1
+	mov	(a0+),e3
+	addc	e3,d1
+	mov	(a0+),d0
+	addc	d0,d1
+	mov	(a0+),e0
+	addc	e0,d1
+	mov	(a0+),e1
+	addc	e1,d1
+	mov	(a0+),e3
+	addc	e3,d1
+	addc	+0,d1
+
+	sub	d3,d2
+	bcc	do_csum_loop
+
+	add	d3,d2
+	beq	do_csum_done
+
+do_csum_remainder:
+	# cut 16-31 bytes down to 0-15
+	cmp	+16,d2
+	bcs	do_csum_fewer_than_16
+	mov	(a0+),d0
+	add	d0,d1
+	mov	(a0+),e0
+	addc	e0,d1
+	mov	(a0+),e1
+	addc	e1,d1
+	mov	(a0+),e3
+	addc	e3,d1
+	addc	+0,d1
+	add	-16,d2
+	beq	do_csum_done
+
+do_csum_fewer_than_16:
+	# copy the remaining whole words
+	cmp	+4,d2
+	bcs	do_csum_fewer_than_4
+	cmp	+8,d2
+	bcs	do_csum_one_word
+	cmp	+12,d2
+	bcs	do_csum_two_words
+	mov	(a0+),d0
+	add	d0,d1
+	addc	+0,d1
+do_csum_two_words:
+	mov	(a0+),d0
+	add	d0,d1
+	addc	+0,d1
+do_csum_one_word:
+	mov	(a0+),d0
+	add	d0,d1
+	addc	+0,d1
+
+do_csum_fewer_than_4:
+	and	+3,d2
+	beq	do_csum_done
+	xor_cmp	d0,d0,+2,d2
+	bcs	do_csum_fewer_than_2
+	movhu	(a0+),d0
+do_csum_fewer_than_2:
+	and	+1,d2
+	beq	do_csum_add_last_bit
+	movbu	(a0),d3
+	add	d3,d0
+do_csum_add_last_bit:
+	add	d0,d1
+	addc	+0,d1
+
+do_csum_done:
+	# compress the checksum down to 16 bits
+	mov	+0xffff0000,d2
+	and	d1,d2
+	asl	+16,d1
+	add	d2,d1,d0
+	addc	+0xffff,d0
+	lsr	+16,d0
+
+	# flip the halves of the word result if the buffer was oddly aligned
+	mov	(12,sp),d1
+	and	+1,d1
+	beq	do_csum_not_oddly_aligned
+	swaph	d0,d0				# exchange bits 15:8 with 7:0
+
+do_csum_not_oddly_aligned:
+	ret	[d2,d3],8
+
+do_csum_end:
+	.size	do_csum, do_csum_end-do_csum
diff --git a/arch/mn10300/lib/internal.h b/arch/mn10300/lib/internal.h
new file mode 100644
index 000000000000..0014eee5f04f
--- /dev/null
+++ b/arch/mn10300/lib/internal.h
@@ -0,0 +1,15 @@
+/* Internal definitions for the arch part of the kernel library
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * do_csum.S
+ */
+extern unsigned int do_csum(const unsigned char *, size_t);
diff --git a/arch/mn10300/lib/lshrdi3.c b/arch/mn10300/lib/lshrdi3.c
new file mode 100644
index 000000000000..e05e64e9ce96
--- /dev/null
+++ b/arch/mn10300/lib/lshrdi3.c
@@ -0,0 +1,60 @@
+/* lshrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public Licence as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public Licence for more details.
+
+You should have received a copy of the GNU General Public Licence
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#define BITS_PER_UNIT 8
+
+typedef 	 int SItype	__attribute__((mode(SI)));
+typedef unsigned int USItype	__attribute__((mode(SI)));
+typedef		 int DItype	__attribute__((mode(DI)));
+typedef		 int word_type	__attribute__((mode(__word__)));
+
+struct DIstruct {
+	SItype	low;
+	SItype	high;
+};
+
+union DIunion {
+	struct DIstruct	s;
+	DItype		ll;
+};
+
+DItype __lshrdi3(DItype u, word_type b)
+{
+	union DIunion w;
+	word_type bm;
+	union DIunion uu;
+
+	if (b == 0)
+		return u;
+
+	uu.ll = u;
+
+	bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
+	if (bm <= 0) {
+		w.s.high = 0;
+		w.s.low = (USItype) uu.s.high >> -bm;
+	} else {
+		USItype carries = (USItype) uu.s.high << bm;
+		w.s.high = (USItype) uu.s.high >> b;
+		w.s.low = ((USItype) uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
diff --git a/arch/mn10300/lib/memcpy.S b/arch/mn10300/lib/memcpy.S
new file mode 100644
index 000000000000..25fb9bb2604f
--- /dev/null
+++ b/arch/mn10300/lib/memcpy.S
@@ -0,0 +1,135 @@
+/* MN10300 Optimised simple memory to memory copy
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .section .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# void *memcpy(void *dst, const void *src, size_t n)
+#
+###############################################################################
+	.globl	memcpy
+        .type	memcpy,@function
+memcpy:
+	movm	[d2,d3],(sp)
+	mov	d0,(12,sp)
+	mov	d1,(16,sp)
+	mov	(20,sp),d2			# count
+	mov	d0,a0				# dst
+	mov	d1,a1				# src
+	mov	d0,e3				# the return value
+
+	cmp	+0,d2
+	beq	memcpy_done			# return if zero-length copy
+
+	# see if the three parameters are all four-byte aligned
+	or	d0,d1,d3
+	or	d2,d3
+	and	+3,d3
+	bne	memcpy_1			# jump if not
+
+	# we want to transfer as much as we can in chunks of 32 bytes
+	cmp	+31,d2
+	bls	memcpy_4_remainder		# 4-byte aligned remainder
+
+	movm	[exreg1],(sp)
+	add	-32,d2
+	mov	+32,d3
+
+memcpy_4_loop:
+	mov	(a1+),d0
+	mov	(a1+),d1
+	mov	(a1+),e0
+	mov	(a1+),e1
+	mov	(a1+),e4
+	mov	(a1+),e5
+	mov	(a1+),e6
+	mov	(a1+),e7
+	mov	d0,(a0+)
+	mov	d1,(a0+)
+	mov	e0,(a0+)
+	mov	e1,(a0+)
+	mov	e4,(a0+)
+	mov	e5,(a0+)
+	mov	e6,(a0+)
+	mov	e7,(a0+)
+
+	sub	d3,d2
+	bcc	memcpy_4_loop
+
+	movm	(sp),[exreg1]
+	add	d3,d2
+	beq	memcpy_4_no_remainder
+
+memcpy_4_remainder:
+	# cut 4-7 words down to 0-3
+	cmp	+16,d2
+	bcs	memcpy_4_three_or_fewer_words
+	mov	(a1+),d0
+	mov	(a1+),d1
+	mov	(a1+),e0
+	mov	(a1+),e1
+	mov	d0,(a0+)
+	mov	d1,(a0+)
+	mov	e0,(a0+)
+	mov	e1,(a0+)
+	add	-16,d2
+	beq	memcpy_4_no_remainder
+
+	# copy the remaining 1, 2 or 3 words
+memcpy_4_three_or_fewer_words:
+	cmp	+8,d2
+	bcs	memcpy_4_one_word
+	beq	memcpy_4_two_words
+	mov	(a1+),d0
+	mov	d0,(a0+)
+memcpy_4_two_words:
+	mov	(a1+),d0
+	mov	d0,(a0+)
+memcpy_4_one_word:
+	mov	(a1+),d0
+	mov	d0,(a0+)
+
+memcpy_4_no_remainder:
+	# check we copied the correct amount
+	# TODO: REMOVE CHECK
+	sub	e3,a0,d2
+	mov	(20,sp),d1
+	cmp	d2,d1
+	beq	memcpy_done
+	break
+	break
+	break
+
+memcpy_done:
+	mov	e3,a0
+	ret	[d2,d3],8
+
+	# handle misaligned copying
+memcpy_1:
+	add	-1,d2
+	mov	+1,d3
+	setlb					# setlb requires the next insns
+						# to occupy exactly 4 bytes
+
+	sub	d3,d2
+	movbu	(a1),d0
+	movbu	d0,(a0)
+	add_add	d3,a1,d3,a0
+	lcc
+
+	mov	e3,a0
+	ret	[d2,d3],8
+
+memcpy_end:
+	.size	memcpy, memcpy_end-memcpy
diff --git a/arch/mn10300/lib/memmove.S b/arch/mn10300/lib/memmove.S
new file mode 100644
index 000000000000..20b07b62b77c
--- /dev/null
+++ b/arch/mn10300/lib/memmove.S
@@ -0,0 +1,160 @@
+/* MN10300 Optimised simple memory to memory copy, with support for overlapping
+ * regions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .section .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# void *memmove(void *dst, const void *src, size_t n)
+#
+###############################################################################
+	.globl	memmove
+        .type	memmove,@function
+memmove:
+	# fall back to memcpy if dst < src to work bottom up
+	cmp	d1,d0
+	bcs	memmove_memcpy
+
+	# work top down
+	movm	[d2,d3],(sp)
+	mov	d0,(12,sp)
+	mov	d1,(16,sp)
+	mov	(20,sp),d2			# count
+	add	d0,d2,a0			# dst end
+	add	d1,d2,a1			# src end
+	mov	d0,e3				# the return value
+
+	cmp	+0,d2
+	beq	memmove_done			# return if zero-length copy
+
+	# see if the three parameters are all four-byte aligned
+	or	d0,d1,d3
+	or	d2,d3
+	and	+3,d3
+	bne	memmove_1			# jump if not
+
+	# we want to transfer as much as we can in chunks of 32 bytes
+	add	-4,a1
+	cmp	+31,d2
+	bls	memmove_4_remainder		# 4-byte aligned remainder
+
+	add	-32,d2
+	mov	+32,d3
+
+memmove_4_loop:
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+	mov	(a1),d1
+	sub_sub	+4,a1,+4,a0
+	mov	d1,(a0)
+
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+	mov	(a1),d1
+	sub_sub	+4,a1,+4,a0
+	mov	d1,(a0)
+
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+	mov	(a1),d1
+	sub_sub	+4,a1,+4,a0
+	mov	d1,(a0)
+
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+	mov	(a1),d1
+	sub_sub	+4,a1,+4,a0
+	mov	d1,(a0)
+
+	sub	d3,d2
+	bcc	memmove_4_loop
+
+	add	d3,d2
+	beq	memmove_4_no_remainder
+
+memmove_4_remainder:
+	# cut 4-7 words down to 0-3
+	cmp	+16,d2
+	bcs	memmove_4_three_or_fewer_words
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+	mov	(a1),d1
+	sub_sub	+4,a1,+4,a0
+	mov	d1,(a0)
+	mov	(a1),e0
+	sub_sub	+4,a1,+4,a0
+	mov	e0,(a0)
+	mov	(a1),e1
+	sub_sub	+4,a1,+4,a0
+	mov	e1,(a0)
+	add	-16,d2
+	beq	memmove_4_no_remainder
+
+	# copy the remaining 1, 2 or 3 words
+memmove_4_three_or_fewer_words:
+	cmp	+8,d2
+	bcs	memmove_4_one_word
+	beq	memmove_4_two_words
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+memmove_4_two_words:
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+memmove_4_one_word:
+	mov	(a1),d0
+	sub_sub	+4,a1,+4,a0
+	mov	d0,(a0)
+
+memmove_4_no_remainder:
+	# check we copied the correct amount
+	# TODO: REMOVE CHECK
+	sub	e3,a0,d2
+	beq	memmove_done
+	break
+	break
+	break
+
+memmove_done:
+	mov	e3,a0
+	ret	[d2,d3],8
+
+	# handle misaligned copying
+memmove_1:
+	add	-1,a1
+	add	-1,d2
+	mov	+1,d3
+	setlb					# setlb requires the next insns
+						# to occupy exactly 4 bytes
+
+	sub	d3,d2
+	movbu	(a1),d0
+	sub_sub	d3,a1,d3,a0
+	movbu	d0,(a0)
+	lcc
+
+	mov	e3,a0
+	ret	[d2,d3],8
+
+memmove_memcpy:
+	jmp	memcpy
+
+memmove_end:
+	.size	memmove, memmove_end-memmove
diff --git a/arch/mn10300/lib/memset.S b/arch/mn10300/lib/memset.S
new file mode 100644
index 000000000000..bc02e39629b7
--- /dev/null
+++ b/arch/mn10300/lib/memset.S
@@ -0,0 +1,121 @@
+/* Optimised simple memory fill
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/cache.h>
+
+        .section .text
+        .balign	L1_CACHE_BYTES
+
+###############################################################################
+#
+# void *memset(void *dst, int c, size_t n)
+#
+###############################################################################
+	.globl	memset
+        .type	memset,@function
+memset:
+	movm	[d2,d3],(sp)
+	mov	d0,(12,sp)
+	mov	d1,(16,sp)
+	mov	(20,sp),d2			# count
+	mov	d0,a0				# dst
+	mov	d0,e3				# the return value
+
+	cmp	+0,d2
+	beq	memset_done			# return if zero-length fill
+
+	# see if the region parameters are four-byte aligned
+	or	d0,d2,d3
+	and	+3,d3
+	bne	memset_1			# jump if not
+
+	extbu	d1
+	mov_asl	d1,d3,8,d1
+	or_asl	d1,d3,8,d1
+	or_asl	d1,d3,8,d1
+	or	d3,d1
+
+	# we want to transfer as much as we can in chunks of 32 bytes
+	cmp	+31,d2
+	bls	memset_4_remainder		# 4-byte aligned remainder
+
+	add	-32,d2
+	mov	+32,d3
+
+memset_4_loop:
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+
+	sub	d3,d2
+	bcc	memset_4_loop
+
+	add	d3,d2
+	beq	memset_4_no_remainder
+
+memset_4_remainder:
+	# cut 4-7 words down to 0-3
+	cmp	+16,d2
+	bcs	memset_4_three_or_fewer_words
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	mov	d1,(a0+)
+	add	-16,d2
+	beq	memset_4_no_remainder
+
+	# copy the remaining 1, 2 or 3 words
+memset_4_three_or_fewer_words:
+	cmp	+8,d2
+	bcs	memset_4_one_word
+	beq	memset_4_two_words
+	mov	d1,(a0+)
+memset_4_two_words:
+	mov	d1,(a0+)
+memset_4_one_word:
+	mov	d1,(a0+)
+
+memset_4_no_remainder:
+	# check we set the correct amount
+	# TODO: REMOVE CHECK
+	sub	e3,a0,d2
+	mov	(20,sp),d1
+	cmp	d2,d1
+	beq	memset_done
+	break
+	break
+	break
+
+memset_done:
+	mov	e3,a0
+	ret	[d2,d3],8
+
+	# handle misaligned copying
+memset_1:
+	add	-1,d2
+	mov	+1,d3
+	setlb					# setlb requires the next insns
+						# to occupy exactly 4 bytes
+
+	sub	d3,d2
+	movbu	d1,(a0)
+	inc	a0
+	lcc
+
+	mov	e3,a0
+	ret	[d2,d3],8
+
+memset_end:
+	.size	memset, memset_end-memset
diff --git a/arch/mn10300/lib/negdi2.c b/arch/mn10300/lib/negdi2.c
new file mode 100644
index 000000000000..eae4ecdd5f69
--- /dev/null
+++ b/arch/mn10300/lib/negdi2.c
@@ -0,0 +1,57 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001  Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public Licence as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+In addition to the permissions in the GNU General Public Licence, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public Licence restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public Licence for more details.
+
+You should have received a copy of the GNU General Public Licence
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* It is incorrect to include config.h here, because this file is being
+   compiled for the target, and hence definitions concerning only the host
+   do not apply.  */
+
+#include <linux/types.h>
+
+union DWunion {
+	s64 ll;
+	struct {
+		s32 low;
+		s32 high;
+	} s;
+};
+
+s64 __negdi2(s64 u)
+{
+	union DWunion w;
+	union DWunion uu;
+
+	uu.ll = u;
+
+	w.s.low = -uu.s.low;
+	w.s.high = -uu.s.high - ((u32) w.s.low > 0);
+
+	return w.ll;
+}
diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c
new file mode 100644
index 000000000000..a75b203059c1
--- /dev/null
+++ b/arch/mn10300/lib/usercopy.c
@@ -0,0 +1,166 @@
+/* MN10300 Userspace accessor functions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <asm/uaccess.h>
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		__copy_user(to, from, n);
+	return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		__copy_user_zeroing(to, from, n);
+	return n;
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+#define __do_strncpy_from_user(dst, src, count, res)		\
+do {								\
+	int w;							\
+	asm volatile(						\
+		"	mov	%1,%0\n"			\
+		"	cmp	0,%1\n"				\
+		"	beq	2f\n"				\
+		"0:\n"						\
+		"	movbu	(%5),%2\n"			\
+		"1:\n"						\
+		"	movbu	%2,(%6)\n"			\
+		"	inc	%5\n"				\
+		"	inc	%6\n"				\
+		"	cmp	0,%2\n"				\
+		"	beq	2f\n"				\
+		"	add	-1,%1\n"			\
+		"	bne	0b\n"				\
+		"2:\n"						\
+		"	sub	%1,%0\n"			\
+		"3:\n"						\
+		"	.section .fixup,\"ax\"\n"		\
+		"4:\n"						\
+		"	mov	%3,%0\n"			\
+		"	jmp	3b\n"				\
+		"	.previous\n"				\
+		"	.section __ex_table,\"a\"\n"		\
+		"	.balign 4\n"				\
+		"	.long 0b,4b\n"				\
+		"	.long 1b,4b\n"				\
+		"	.previous"				\
+		:"=&r"(res), "=r"(count), "=&r"(w)		\
+		:"i"(-EFAULT), "1"(count), "a"(src), "a"(dst)	\
+		:"memory");					\
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+	long res;
+	__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+	long res = -EFAULT;
+	if (access_ok(VERIFY_READ, src, 1))
+		__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+
+/*
+ * Clear a userspace memory
+ */
+#define __do_clear_user(addr, size)		\
+do {						\
+	int w;					\
+	asm volatile(				\
+		"	cmp 0,%0\n"		\
+		"	beq 1f\n"		\
+		"	clr %1\n"		\
+		"0:	movbu %1,(%3,%2)\n"	\
+		"	inc %3\n"		\
+		"	cmp %0,%3\n"		\
+		"	bne 0b\n"		\
+		"1:\n"				\
+		"	sub %3,%0\n"		\
+		"2:\n"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:	jmp 2b\n"		\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n"	\
+		"       .balign 4\n"		\
+		"       .long 0b,3b\n"		\
+		".previous\n"			\
+		: "+r"(size), "=&r"(w)		\
+		: "a"(addr), "d"(0)		\
+		: "memory");			\
+} while (0)
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+	__do_clear_user(to, n);
+	return n;
+}
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		__do_clear_user(to, n);
+	return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+long strnlen_user(const char *s, long n)
+{
+	unsigned long res, w;
+
+	if (!__addr_ok(s))
+		return 0;
+
+	if (n < 0 || n + (u_long) s > current_thread_info()->addr_limit.seg)
+		n = current_thread_info()->addr_limit.seg - (u_long)s;
+
+	asm volatile(
+		"0:	cmp %4,%0\n"
+		"	beq 2f\n"
+		"1:	movbu (%0,%3),%1\n"
+		"	inc %0\n"
+		"	cmp 0,%1\n"
+		"	beq 3f\n"
+		"	bra 0b\n"
+		"2:	clr %0\n"
+		"3:\n"
+		".section .fixup,\"ax\"\n"
+		"4:	jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.balign 4\n"
+		"	.long 1b,4b\n"
+		".previous\n"
+		:"=d"(res), "=&r"(w)
+		:"0"(0), "a"(s), "r"(n)
+		:"memory");
+	return res;
+}
diff --git a/arch/mn10300/mm/Makefile b/arch/mn10300/mm/Makefile
new file mode 100644
index 000000000000..28b9d983db0c
--- /dev/null
+++ b/arch/mn10300/mm/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the MN10300-specific memory management code
+#
+
+obj-y := \
+	init.o fault.o pgtable.o extable.o tlb-mn10300.o mmu-context.o \
+	misalignment.o dma-alloc.o
+
+ifneq ($(CONFIG_MN10300_CACHE_DISABLED),y)
+obj-y	+= cache.o cache-mn10300.o
+ifeq ($(CONFIG_MN10300_CACHE_WBACK),y)
+obj-y	+= cache-flush-mn10300.o
+endif
+endif
diff --git a/arch/mn10300/mm/cache-flush-mn10300.S b/arch/mn10300/mm/cache-flush-mn10300.S
new file mode 100644
index 000000000000..c8ed1cbac107
--- /dev/null
+++ b/arch/mn10300/mm/cache-flush-mn10300.S
@@ -0,0 +1,192 @@
+/* MN10300 CPU core caching routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+	.am33_2
+	.globl mn10300_dcache_flush
+	.globl mn10300_dcache_flush_page
+	.globl mn10300_dcache_flush_range
+	.globl mn10300_dcache_flush_range2
+	.globl mn10300_dcache_flush_inv
+	.globl mn10300_dcache_flush_inv_page
+	.globl mn10300_dcache_flush_inv_range
+	.globl mn10300_dcache_flush_inv_range2
+
+###############################################################################
+#
+# void mn10300_dcache_flush(void)
+# Flush the entire data cache back to RAM
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_flush:
+	movhu	(CHCTR),d0
+	btst	CHCTR_DCEN,d0
+	beq	mn10300_dcache_flush_end
+
+	# read the addresses tagged in the cache's tag RAM and attempt to flush
+	# those addresses specifically
+	# - we rely on the hardware to filter out invalid tag entry addresses
+	mov	DCACHE_TAG(0,0),a0		# dcache tag RAM access address
+	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
+	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1  # total number of entries
+
+mn10300_dcache_flush_loop:
+	mov	(a0),d0
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+	or	L1_CACHE_TAG_VALID,d0		# retain valid entries in the
+						# cache
+	mov	d0,(a1)				# conditional purge
+
+mn10300_dcache_flush_skip:
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	add	-1,d1
+	bne	mn10300_dcache_flush_loop
+
+mn10300_dcache_flush_end:
+	ret	[],0
+
+###############################################################################
+#
+# void mn10300_dcache_flush_page(unsigned start)
+# void mn10300_dcache_flush_range(unsigned start, unsigned end)
+# void mn10300_dcache_flush_range2(unsigned start, unsigned size)
+# Flush a range of addresses on a page in the dcache
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_flush_page:
+	mov	PAGE_SIZE,d1
+mn10300_dcache_flush_range2:
+	add	d0,d1
+mn10300_dcache_flush_range:
+	movm	[d2,d3],(sp)
+
+	movhu	(CHCTR),d2
+	btst	CHCTR_DCEN,d2
+	beq	mn10300_dcache_flush_range_end
+
+	# round start addr down
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0
+	mov	d0,a1
+
+	add	L1_CACHE_BYTES,d1			# round end addr up
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+
+	# write a request to flush all instances of an address from the cache
+	mov	DCACHE_PURGE(0,0),a0
+	mov	a1,d0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0				# starting dcache purge control
+						# reg address
+
+	sub	a1,d1
+	lsr	L1_CACHE_SHIFT,d1		# total number of entries to
+						# examine
+
+	or	L1_CACHE_TAG_VALID,a1		# retain valid entries in the
+						# cache
+
+mn10300_dcache_flush_range_loop:
+	mov	a1,(L1_CACHE_WAYDISP*0,a0)	# conditionally purge this line
+						# all ways
+
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	and	~L1_CACHE_WAYDISP,a0		# make sure way stay on way 0
+	add	-1,d1
+	bne	mn10300_dcache_flush_range_loop
+
+mn10300_dcache_flush_range_end:
+	ret	[d2,d3],8
+
+###############################################################################
+#
+# void mn10300_dcache_flush_inv(void)
+# Flush the entire data cache and invalidate all entries
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_flush_inv:
+	movhu	(CHCTR),d0
+	btst	CHCTR_DCEN,d0
+	beq	mn10300_dcache_flush_inv_end
+
+	# hit each line in the dcache with an unconditional purge
+	mov	DCACHE_PURGE(0,0),a1		# dcache purge request address
+	mov	L1_CACHE_NWAYS*L1_CACHE_NENTRIES,d1  # total number of entries
+
+mn10300_dcache_flush_inv_loop:
+	mov	(a1),d0				# unconditional purge
+
+	add	L1_CACHE_BYTES,a1
+	add	-1,d1
+	bne	mn10300_dcache_flush_inv_loop
+
+mn10300_dcache_flush_inv_end:
+	ret	[],0
+
+###############################################################################
+#
+# void mn10300_dcache_flush_inv_page(unsigned start)
+# void mn10300_dcache_flush_inv_range(unsigned start, unsigned end)
+# void mn10300_dcache_flush_inv_range2(unsigned start, unsigned size)
+# Flush and invalidate a range of addresses on a page in the dcache
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_flush_inv_page:
+	mov	PAGE_SIZE,d1
+mn10300_dcache_flush_inv_range2:
+	add	d0,d1
+mn10300_dcache_flush_inv_range:
+	movm	[d2,d3],(sp)
+	movhu	(CHCTR),d2
+	btst	CHCTR_DCEN,d2
+	beq	mn10300_dcache_flush_inv_range_end
+
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0	# round start
+								# addr down
+	mov	d0,a1
+
+	add	L1_CACHE_BYTES,d1			# round end addr up
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+
+	# write a request to flush and invalidate all instances of an address
+	# from the cache
+	mov	DCACHE_PURGE(0,0),a0
+	mov	a1,d0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0				# starting dcache purge control
+						# reg address
+
+	sub	a1,d1
+	lsr	L1_CACHE_SHIFT,d1		# total number of entries to
+						# examine
+
+mn10300_dcache_flush_inv_range_loop:
+	mov	a1,(L1_CACHE_WAYDISP*0,a0)	# conditionally purge this line
+						# in all ways
+
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	and	~L1_CACHE_WAYDISP,a0		# make sure way stay on way 0
+	add	-1,d1
+	bne	mn10300_dcache_flush_inv_range_loop
+
+mn10300_dcache_flush_inv_range_end:
+	ret	[d2,d3],8
diff --git a/arch/mn10300/mm/cache-mn10300.S b/arch/mn10300/mm/cache-mn10300.S
new file mode 100644
index 000000000000..e839d0aedd69
--- /dev/null
+++ b/arch/mn10300/mm/cache-mn10300.S
@@ -0,0 +1,289 @@
+/* MN10300 CPU core caching routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define mn10300_dcache_inv_range_intr_interval \
+	+((1 << MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL) - 1)
+
+#if mn10300_dcache_inv_range_intr_interval > 0xff
+#error MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL must be 8 or less
+#endif
+
+	.am33_2
+
+	.globl mn10300_icache_inv
+	.globl mn10300_dcache_inv
+	.globl mn10300_dcache_inv_range
+	.globl mn10300_dcache_inv_range2
+	.globl mn10300_dcache_inv_page
+
+###############################################################################
+#
+# void mn10300_icache_inv(void)
+# Invalidate the entire icache
+#
+###############################################################################
+	ALIGN
+mn10300_icache_inv:
+	mov	CHCTR,a0
+
+	movhu	(a0),d0
+	btst	CHCTR_ICEN,d0
+	beq	mn10300_icache_inv_end
+
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	nop
+	nop
+
+	# disable the icache
+	and	~CHCTR_ICEN,d0
+	movhu	d0,(a0)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# invalidate
+	or	CHCTR_ICINV,d0
+	movhu	d0,(a0)
+
+	# wait for the cache to finish
+	mov	CHCTR,a0
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_ICBUSY,d0
+	lne
+
+	# and reenable it
+	and	~CHCTR_ICINV,d0
+	or	CHCTR_ICEN,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+	mov	d1,epsw
+
+mn10300_icache_inv_end:
+	ret	[],0
+
+###############################################################################
+#
+# void mn10300_dcache_inv(void)
+# Invalidate the entire dcache
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_inv:
+	mov	CHCTR,a0
+
+	movhu	(a0),d0
+	btst	CHCTR_DCEN,d0
+	beq	mn10300_dcache_inv_end
+
+	mov	epsw,d1
+	and	~EPSW_IE,epsw
+	nop
+	nop
+
+	# disable the dcache
+	and	~CHCTR_DCEN,d0
+	movhu	d0,(a0)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+	# invalidate
+	or	CHCTR_DCINV,d0
+	movhu	d0,(a0)
+
+	# wait for the cache to finish
+	mov	CHCTR,a0
+	setlb
+	movhu	(a0),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+	# and reenable it
+	and	~CHCTR_DCINV,d0
+	or	CHCTR_DCEN,d0
+	movhu	d0,(a0)
+	movhu	(a0),d0
+
+	mov	d1,epsw
+
+mn10300_dcache_inv_end:
+	ret	[],0
+
+###############################################################################
+#
+# void mn10300_dcache_inv_range(unsigned start, unsigned end)
+# void mn10300_dcache_inv_range2(unsigned start, unsigned size)
+# void mn10300_dcache_inv_page(unsigned start)
+# Invalidate a range of addresses on a page in the dcache
+#
+###############################################################################
+	ALIGN
+mn10300_dcache_inv_page:
+	mov	PAGE_SIZE,d1
+mn10300_dcache_inv_range2:
+	add	d0,d1
+mn10300_dcache_inv_range:
+	movm	[d2,d3,a2],(sp)
+	mov	CHCTR,a2
+
+	movhu	(a2),d2
+	btst	CHCTR_DCEN,d2
+	beq	mn10300_dcache_inv_range_end
+
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d0	# round start
+								# addr down
+	mov	d0,a1
+
+	add	L1_CACHE_BYTES,d1			# round end addr up
+	and	L1_CACHE_TAG_ADDRESS|L1_CACHE_TAG_ENTRY,d1
+
+	clr	d2				# we're going to clear tag ram
+						# entries
+
+	# read the tags from the tag RAM, and if they indicate a valid dirty
+	# cache line then invalidate that line
+	mov	DCACHE_TAG(0,0),a0
+	mov	a1,d0
+	and	L1_CACHE_TAG_ENTRY,d0
+	add	d0,a0				# starting dcache tag RAM
+						# access address
+
+	sub	a1,d1
+	lsr	L1_CACHE_SHIFT,d1		# total number of entries to
+						# examine
+
+	and	~(L1_CACHE_DISPARITY-1),a1	# determine comparator base
+
+mn10300_dcache_inv_range_outer_loop:
+	# disable interrupts
+	mov	epsw,d3
+	and	~EPSW_IE,epsw
+	nop					# note that reading CHCTR and
+						# AND'ing D0 occupy two delay
+						# slots after disabling
+						# interrupts
+
+	# disable the dcache
+	movhu	(a2),d0
+	and	~CHCTR_DCEN,d0
+	movhu	d0,(a2)
+
+	# and wait for it to calm down
+	setlb
+	movhu	(a2),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+mn10300_dcache_inv_range_loop:
+
+	# process the way 0 slot
+	mov	(L1_CACHE_WAYDISP*0,a0),d0	# read the tag in the way 0 slot
+	btst	L1_CACHE_TAG_VALID,d0
+	beq	mn10300_dcache_inv_range_skip_0	# jump if this cacheline is not
+						# valid
+
+	xor	a1,d0
+	lsr	12,d0
+	bne	mn10300_dcache_inv_range_skip_0	# jump if not this cacheline
+
+	mov	d2,(a0)				# kill the tag
+
+mn10300_dcache_inv_range_skip_0:
+
+	# process the way 1 slot
+	mov	(L1_CACHE_WAYDISP*1,a0),d0	# read the tag in the way 1 slot
+	btst	L1_CACHE_TAG_VALID,d0
+	beq	mn10300_dcache_inv_range_skip_1	# jump if this cacheline is not
+						# valid
+
+	xor	a1,d0
+	lsr	12,d0
+	bne	mn10300_dcache_inv_range_skip_1	# jump if not this cacheline
+
+	mov	d2,(a0)				# kill the tag
+
+mn10300_dcache_inv_range_skip_1:
+
+	# process the way 2 slot
+	mov	(L1_CACHE_WAYDISP*2,a0),d0	# read the tag in the way 2 slot
+	btst	L1_CACHE_TAG_VALID,d0
+	beq	mn10300_dcache_inv_range_skip_2	# jump if this cacheline is not
+						# valid
+
+	xor	a1,d0
+	lsr	12,d0
+	bne	mn10300_dcache_inv_range_skip_2	# jump if not this cacheline
+
+	mov	d2,(a0)				# kill the tag
+
+mn10300_dcache_inv_range_skip_2:
+
+	# process the way 3 slot
+	mov	(L1_CACHE_WAYDISP*3,a0),d0	# read the tag in the way 3 slot
+	btst	L1_CACHE_TAG_VALID,d0
+	beq	mn10300_dcache_inv_range_skip_3	# jump if this cacheline is not
+						# valid
+
+	xor	a1,d0
+	lsr	12,d0
+	bne	mn10300_dcache_inv_range_skip_3	# jump if not this cacheline
+
+	mov	d2,(a0)				# kill the tag
+
+mn10300_dcache_inv_range_skip_3:
+
+	# approx every N steps we re-enable the cache and see if there are any
+	# interrupts to be processed
+	# we also break out if we've reached the end of the loop
+	# (the bottom nibble of the count is zero in both cases)
+	add	L1_CACHE_BYTES,a0
+	add	L1_CACHE_BYTES,a1
+	add	-1,d1
+	btst	mn10300_dcache_inv_range_intr_interval,d1
+	bne	mn10300_dcache_inv_range_loop
+
+	# wait for the cache to finish what it's doing
+	setlb
+	movhu	(a2),d0
+	btst	CHCTR_DCBUSY,d0
+	lne
+
+	# and reenable it
+	or	CHCTR_DCEN,d0
+	movhu	d0,(a2)
+	movhu	(a2),d0
+
+	# re-enable interrupts
+	# - we don't bother with delay NOPs as we'll have enough instructions
+	#   before we disable interrupts again to give the interrupts a chance
+	#   to happen
+	mov	d3,epsw
+
+	# go around again if the counter hasn't yet reached zero
+	add	0,d1
+	bne	mn10300_dcache_inv_range_outer_loop
+
+mn10300_dcache_inv_range_end:
+	ret	[d2,d3,a2],12
diff --git a/arch/mn10300/mm/cache.c b/arch/mn10300/mm/cache.c
new file mode 100644
index 000000000000..1b76719ec1c3
--- /dev/null
+++ b/arch/mn10300/mm/cache.c
@@ -0,0 +1,121 @@
+/* MN10300 Cache flushing routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+EXPORT_SYMBOL(mn10300_icache_inv);
+EXPORT_SYMBOL(mn10300_dcache_inv);
+EXPORT_SYMBOL(mn10300_dcache_inv_range);
+EXPORT_SYMBOL(mn10300_dcache_inv_range2);
+EXPORT_SYMBOL(mn10300_dcache_inv_page);
+
+#ifdef CONFIG_MN10300_CACHE_WBACK
+EXPORT_SYMBOL(mn10300_dcache_flush);
+EXPORT_SYMBOL(mn10300_dcache_flush_inv);
+EXPORT_SYMBOL(mn10300_dcache_flush_inv_range);
+EXPORT_SYMBOL(mn10300_dcache_flush_inv_range2);
+EXPORT_SYMBOL(mn10300_dcache_flush_inv_page);
+EXPORT_SYMBOL(mn10300_dcache_flush_range);
+EXPORT_SYMBOL(mn10300_dcache_flush_range2);
+EXPORT_SYMBOL(mn10300_dcache_flush_page);
+#endif
+
+/*
+ * write a page back from the dcache and invalidate the icache so that we can
+ * run code from it that we've just written into it
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	mn10300_dcache_flush_page(page_to_phys(page));
+	mn10300_icache_inv();
+}
+EXPORT_SYMBOL(flush_icache_page);
+
+/*
+ * write some code we've just written back from the dcache and invalidate the
+ * icache so that we can run that code
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_MN10300_CACHE_WBACK
+	unsigned long addr, size, off;
+	struct page *page;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ppte, pte;
+
+	for (; start < end; start += size) {
+		/* work out how much of the page to flush */
+		off = start & (PAGE_SIZE - 1);
+
+		size = end - start;
+		if (size > PAGE_SIZE - off)
+			size = PAGE_SIZE - off;
+
+		/* get the physical address the page is mapped to from the page
+		 * tables */
+		pgd = pgd_offset(current->mm, start);
+		if (!pgd || !pgd_val(*pgd))
+			continue;
+
+		pud = pud_offset(pgd, start);
+		if (!pud || !pud_val(*pud))
+			continue;
+
+		pmd = pmd_offset(pud, start);
+		if (!pmd || !pmd_val(*pmd))
+			continue;
+
+		ppte = pte_offset_map(pmd, start);
+		if (!ppte)
+			continue;
+		pte = *ppte;
+		pte_unmap(ppte);
+
+		if (pte_none(pte))
+			continue;
+
+		page = pte_page(pte);
+		if (!page)
+			continue;
+
+		addr = page_to_phys(page);
+
+		/* flush the dcache and invalidate the icache coverage on that
+		 * region */
+		mn10300_dcache_flush_range2(addr + off, size);
+	}
+#endif
+
+	mn10300_icache_inv();
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+/*
+ * allow userspace to flush the instruction cache
+ */
+asmlinkage long sys_cacheflush(unsigned long start, unsigned long end)
+{
+	if (end < start)
+		return -EINVAL;
+
+	flush_icache_range(start, end);
+	return 0;
+}
diff --git a/arch/mn10300/mm/dma-alloc.c b/arch/mn10300/mm/dma-alloc.c
new file mode 100644
index 000000000000..f3649d8f50e3
--- /dev/null
+++ b/arch/mn10300/mm/dma-alloc.c
@@ -0,0 +1,56 @@
+/* MN10300 Dynamic DMA mapping support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Derived from: arch/i386/kernel/pci-dma.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *dma_handle, int gfp)
+{
+	unsigned long addr;
+	void *ret;
+
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dev == NULL || dev->coherent_dma_mask < 0xffffffff)
+		gfp |= GFP_DMA;
+
+	addr = __get_free_pages(gfp, get_order(size));
+	if (!addr)
+		return NULL;
+
+	/* map the coherent memory through the uncached memory window */
+	ret = (void *) (addr | 0x20000000);
+
+	/* fill the memory with obvious rubbish */
+	memset((void *) addr, 0xfb, size);
+
+	/* write back and evict all cache lines covering this region */
+	mn10300_dcache_flush_inv_range2(virt_to_phys((void *) addr), PAGE_SIZE);
+
+	*dma_handle = virt_to_bus((void *) addr);
+	return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+		       dma_addr_t dma_handle)
+{
+	unsigned long addr = (unsigned long) vaddr & ~0x20000000;
+
+	free_pages(addr, get_order(size));
+}
+EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/mn10300/mm/extable.c b/arch/mn10300/mm/extable.c
new file mode 100644
index 000000000000..25e5485ab87d
--- /dev/null
+++ b/arch/mn10300/mm/extable.c
@@ -0,0 +1,26 @@
+/* MN10300 In-kernel exception handling
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
new file mode 100644
index 000000000000..78f092ca0316
--- /dev/null
+++ b/arch/mn10300/mm/fault.c
@@ -0,0 +1,405 @@
+/* MN10300 MMU Fault handler
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+#include <asm/gdb-stub.h>
+#include <asm/cpu-regs.h>
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out
+ */
+void bust_spinlocks(int yes)
+{
+	if (yes) {
+		oops_in_progress = 1;
+#ifdef CONFIG_SMP
+		/* Many serial drivers do __global_cli() */
+		global_irq_lock = 0;
+#endif
+	} else {
+		int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+		unblank_screen();
+#endif
+		oops_in_progress = 0;
+		/*
+		 * OK, the message is on the console.  Now we call printk()
+		 * without oops_in_progress set so that printk will give klogd
+		 * a poke.  Hold onto your hats...
+		 */
+		console_loglevel = 15;	/* NMI oopser may have shut the console
+					 * up */
+		printk(" ");
+		console_loglevel = loglevel_save;
+	}
+}
+
+void do_BUG(const char *file, int line)
+{
+	bust_spinlocks(1);
+	printk(KERN_EMERG "------------[ cut here ]------------\n");
+	printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
+}
+
+#if 0
+static void print_pagetable_entries(pgd_t *pgdir, unsigned long address)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgdir + __pgd_offset(address);
+	printk(KERN_DEBUG "pgd entry %p: %016Lx\n",
+	       pgd, (long long) pgd_val(*pgd));
+
+	if (!pgd_present(*pgd)) {
+		printk(KERN_DEBUG "... pgd not present!\n");
+		return;
+	}
+	pmd = pmd_offset(pgd, address);
+	printk(KERN_DEBUG "pmd entry %p: %016Lx\n",
+	       pmd, (long long)pmd_val(*pmd));
+
+	if (!pmd_present(*pmd)) {
+		printk(KERN_DEBUG "... pmd not present!\n");
+		return;
+	}
+	pte = pte_offset(pmd, address);
+	printk(KERN_DEBUG "pte entry %p: %016Lx\n",
+	       pte, (long long) pte_val(*pte));
+
+	if (!pte_present(*pte))
+		printk(KERN_DEBUG "... pte not present!\n");
+}
+#endif
+
+asmlinkage void monitor_signal(struct pt_regs *);
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * fault_code:
+ * - LSW: either MMUFCR_IFC or MMUFCR_DFC as appropriate
+ * - MSW: 0 if data access, 1 if instruction access
+ * - bit 0: TLB miss flag
+ * - bit 1: initial write
+ * - bit 2: page invalid
+ * - bit 3: protection violation
+ * - bit 4: accessor (0=user 1=kernel)
+ * - bit 5: 0=read 1=write
+ * - bit 6-8: page protection spec
+ * - bit 9: illegal address
+ * - bit 16: 0=data 1=ins
+ *
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
+			      unsigned long address)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	unsigned long page;
+	siginfo_t info;
+	int write, fault;
+
+#ifdef CONFIG_GDBSTUB
+	/* handle GDB stub causing a fault */
+	if (gdbstub_busy) {
+		gdbstub_exception(regs, TBR & TBR_INT_CODE);
+		return;
+	}
+#endif
+
+#if 0
+	printk(KERN_DEBUG "--- do_page_fault(%p,%s:%04lx,%08lx)\n",
+	       regs,
+	       fault_code & 0x10000 ? "ins" : "data",
+	       fault_code & 0xffff, address);
+#endif
+
+	tsk = current;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * and that the fault was a page not present (invalid) error
+	 */
+	if (address >= VMALLOC_START && address < VMALLOC_END &&
+	    (fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_SR &&
+	    (fault_code & MMUFCR_xFC_PGINVAL) == MMUFCR_xFC_PGINVAL
+	    )
+		goto vmalloc_fault;
+
+	mm = tsk->mm;
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) {
+		/* accessing the stack below the stack pointer is always a
+		 * bug */
+		if ((address & PAGE_MASK) + 2 * PAGE_SIZE < regs->sp) {
+#if 0
+			printk(KERN_WARNING
+			       "[%d] ### Access below stack @%lx (sp=%lx)\n",
+			       current->pid, address, regs->sp);
+			printk(KERN_WARNING
+			       "vma [%08x - %08x]\n",
+			       vma->vm_start, vma->vm_end);
+			show_registers(regs);
+			printk(KERN_WARNING
+			       "[%d] ### Code: [%08lx]"
+			       " %02x %02x %02x %02x %02x %02x %02x %02x\n",
+			       current->pid,
+			       regs->pc,
+			       ((u8 *) regs->pc)[0],
+			       ((u8 *) regs->pc)[1],
+			       ((u8 *) regs->pc)[2],
+			       ((u8 *) regs->pc)[3],
+			       ((u8 *) regs->pc)[4],
+			       ((u8 *) regs->pc)[5],
+			       ((u8 *) regs->pc)[6],
+			       ((u8 *) regs->pc)[7]
+			       );
+#endif
+			goto bad_area;
+		}
+	}
+
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	info.si_code = SEGV_ACCERR;
+	write = 0;
+	switch (fault_code & (MMUFCR_xFC_PGINVAL|MMUFCR_xFC_TYPE)) {
+	default:	/* 3: write, present */
+	case MMUFCR_xFC_TYPE_WRITE:
+#ifdef TEST_VERIFY_AREA
+		if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_SR)
+			printk(KERN_DEBUG "WP fault at %08lx\n", regs->pc);
+#endif
+		/* write to absent page */
+	case MMUFCR_xFC_PGINVAL | MMUFCR_xFC_TYPE_WRITE:
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		write++;
+		break;
+
+		/* read from protected page */
+	case MMUFCR_xFC_TYPE_READ:
+		goto bad_area;
+
+		/* read from absent page present */
+	case MMUFCR_xFC_PGINVAL | MMUFCR_xFC_TYPE_READ:
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+		break;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, write);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+	monitor_signal(regs);
+
+	/* User mode accesses just cause a SIGSEGV */
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) {
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void *)address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	monitor_signal(regs);
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+	bust_spinlocks(1);
+
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT
+		       "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT
+		       "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(" printing pc:\n");
+	printk(KERN_ALERT "%08lx\n", regs->pc);
+
+#ifdef CONFIG_GDBSTUB
+	gdbstub_intercept(
+		regs, fault_code & 0x00010000 ? EXCEP_IAERROR : EXCEP_DAERROR);
+#endif
+
+	page = PTBR;
+	page = ((unsigned long *) __va(page))[address >> 22];
+	printk(KERN_ALERT "*pde = %08lx\n", page);
+	if (page & 1) {
+		page &= PAGE_MASK;
+		address &= 0x003ff000;
+		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+		printk(KERN_ALERT "*pte = %08lx\n", page);
+	}
+
+	die("Oops", regs, fault_code);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	monitor_signal(regs);
+	printk(KERN_ALERT "VM: killing process %s\n", tsk->comm);
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	monitor_signal(regs);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_SR)
+		goto no_context;
+	return;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int index = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+		pud_t *pud, *pud_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd_k = init_mm.pgd + index;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+
+		pud_k = pud_offset(pgd_k, address);
+		if (!pud_present(*pud_k))
+			goto no_context;
+
+		pmd_k = pmd_offset(pud_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+
+		pgd = (pgd_t *) PTBR + index;
+		pud = pud_offset(pgd, address);
+		pmd = pmd_offset(pud, address);
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset_kernel(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+}
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
new file mode 100644
index 000000000000..8c5d88c7b90a
--- /dev/null
+++ b/arch/mn10300/mm/init.c
@@ -0,0 +1,160 @@
+/* MN10300 Memory management initialisation
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/tlb.h>
+#include <asm/sections.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long highstart_pfn, highend_pfn;
+
+/*
+ * set up paging
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0,};
+	pte_t *ppte;
+	int loop;
+
+	/* main kernel space -> RAM mapping is handled as 1:1 transparent by
+	 * the MMU */
+	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+	memset(kernel_vmalloc_ptes, 0, sizeof(kernel_vmalloc_ptes));
+
+	/* load the VMALLOC area PTE table addresses into the kernel PGD */
+	ppte = kernel_vmalloc_ptes;
+	for (loop = VMALLOC_START / (PAGE_SIZE * PTRS_PER_PTE);
+	     loop < VMALLOC_END / (PAGE_SIZE * PTRS_PER_PTE);
+	     loop++
+	     ) {
+		set_pgd(swapper_pg_dir + loop, __pgd(__pa(ppte) | _PAGE_TABLE));
+		ppte += PAGE_SIZE / sizeof(pte_t);
+	}
+
+	/* declare the sizes of the RAM zones (only use the normal zone) */
+	zones_size[ZONE_NORMAL] =
+		(contig_page_data.bdata->node_low_pfn) -
+		(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT);
+
+	/* pass the memory from the bootmem allocator to the main allocator */
+	free_area_init(zones_size);
+
+	__flush_tlb_all();
+}
+
+/*
+ * transfer all the memory from the bootmem allocator to the runtime allocator
+ */
+void __init mem_init(void)
+{
+	int codesize, reservedpages, datasize, initsize;
+	int tmp;
+
+	if (!mem_map)
+		BUG();
+
+#define START_PFN	(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN	(contig_page_data.bdata->node_low_pfn)
+
+	max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
+	high_memory = (void *) __va(MAX_LOW_PFN * PAGE_SIZE);
+
+	/* clear the zero-page */
+	memset(empty_zero_page, 0, PAGE_SIZE);
+
+	/* this will put all low memory onto the freelists */
+	totalram_pages += free_all_bootmem();
+
+	reservedpages = 0;
+	for (tmp = 0; tmp < num_physpages; tmp++)
+		if (PageReserved(&mem_map[tmp]))
+			reservedpages++;
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_stext;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk(KERN_INFO
+	       "Memory: %luk/%luk available"
+	       " (%dk kernel code, %dk reserved, %dk data, %dk init,"
+	       " %ldk highmem)\n",
+	       (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+	       max_mapnr << (PAGE_SHIFT - 10),
+	       codesize >> 10,
+	       reservedpages << (PAGE_SHIFT - 10),
+	       datasize >> 10,
+	       initsize >> 10,
+	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
+	       );
+}
+
+/*
+ *
+ */
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		memset((void *) addr, 0xcc, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+/*
+ * recycle memory containing stuff only required for initialisation
+ */
+void free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long) &__init_begin,
+			(unsigned long) &__init_end);
+}
+
+/*
+ * dispose of the memory on which the initial ramdisk resided
+ */
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory", start, end);
+}
+#endif
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
new file mode 100644
index 000000000000..32aa89dc3848
--- /dev/null
+++ b/arch/mn10300/mm/misalignment.c
@@ -0,0 +1,661 @@
+/* MN10300 Misalignment fixup handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/cpu-regs.h>
+#include <asm/busctl-regs.h>
+#include <asm/fpu.h>
+#include <asm/gdb-stub.h>
+#include <asm/asm-offsets.h>
+
+#if 0
+#define kdebug(FMT, ...) printk(KERN_DEBUG FMT, ##__VA_ARGS__)
+#else
+#define kdebug(FMT, ...) do {} while (0)
+#endif
+
+static int misalignment_addr(unsigned long *registers, unsigned params,
+			     unsigned opcode, unsigned disp,
+			     void **_address, unsigned long **_postinc);
+
+static int misalignment_reg(unsigned long *registers, unsigned params,
+			    unsigned opcode, unsigned disp,
+			    unsigned long **_register);
+
+static inline unsigned int_log2(unsigned x)
+{
+	unsigned y;
+	asm("bsch %1,%0" : "=r"(y) : "r"(x), "0"(0));
+	return y;
+}
+#define log2(x) int_log2(x)
+
+static const unsigned Dreg_index[] = {
+	REG_D0 >> 2, REG_D1 >> 2, REG_D2 >> 2, REG_D3 >> 2
+};
+
+static const unsigned Areg_index[] = {
+	REG_A0 >> 2, REG_A1 >> 2, REG_A2 >> 2, REG_A3 >> 2
+};
+
+static const unsigned Rreg_index[] = {
+	REG_E0 >> 2, REG_E1 >> 2, REG_E2 >> 2, REG_E3 >> 2,
+	REG_E4 >> 2, REG_E5 >> 2, REG_E6 >> 2, REG_E7 >> 2,
+	REG_A0 >> 2, REG_A1 >> 2, REG_A2 >> 2, REG_A3 >> 2,
+	REG_D0 >> 2, REG_D1 >> 2, REG_D2 >> 2, REG_D3 >> 2
+};
+
+enum format_id {
+	FMT_S0,
+	FMT_S1,
+	FMT_S2,
+	FMT_S4,
+	FMT_D0,
+	FMT_D1,
+	FMT_D2,
+	FMT_D4,
+	FMT_D6,
+	FMT_D7,
+	FMT_D8,
+	FMT_D9,
+};
+
+struct {
+	u_int8_t opsz, dispsz;
+} format_tbl[16] = {
+	[FMT_S0]	= { 8,	0	},
+	[FMT_S1]	= { 8,	8	},
+	[FMT_S2]	= { 8,	16	},
+	[FMT_S4]	= { 8,	32	},
+	[FMT_D0]	= { 16,	0	},
+	[FMT_D1]	= { 16,	8	},
+	[FMT_D2]	= { 16,	16	},
+	[FMT_D4]	= { 16,	32	},
+	[FMT_D6]	= { 24,	0	},
+	[FMT_D7]	= { 24,	8	},
+	[FMT_D8]	= { 24,	24	},
+	[FMT_D9]	= { 24,	32	},
+};
+
+enum value_id {
+	DM0,		/* data reg in opcode in bits 0-1 */
+	DM1,		/* data reg in opcode in bits 2-3 */
+	DM2,		/* data reg in opcode in bits 4-5 */
+	AM0,		/* addr reg in opcode in bits 0-1 */
+	AM1,		/* addr reg in opcode in bits 2-3 */
+	AM2,		/* addr reg in opcode in bits 4-5 */
+	RM0,		/* reg in opcode in bits 0-3 */
+	RM1,		/* reg in opcode in bits 2-5 */
+	RM2,		/* reg in opcode in bits 4-7 */
+	RM4,		/* reg in opcode in bits 8-11 */
+	RM6,		/* reg in opcode in bits 12-15 */
+
+	RD0,		/* reg in displacement in bits 0-3 */
+	RD2,		/* reg in displacement in bits 4-7 */
+
+	SP,		/* stack pointer */
+
+	SD8,		/* 8-bit signed displacement */
+	SD16,		/* 16-bit signed displacement */
+	SD24,		/* 24-bit signed displacement */
+	SIMM4_2,	/* 4-bit signed displacement in opcode bits 4-7 */
+	SIMM8,		/* 8-bit signed immediate */
+	IMM24,		/* 24-bit unsigned immediate */
+	IMM32,		/* 32-bit unsigned immediate */
+	IMM32_HIGH8,	/* 32-bit unsigned immediate, high 8-bits in opcode */
+
+	DN0	= DM0,
+	DN1	= DM1,
+	DN2	= DM2,
+	AN0	= AM0,
+	AN1	= AM1,
+	AN2	= AM2,
+	RN0	= RM0,
+	RN1	= RM1,
+	RN2	= RM2,
+	RN4	= RM4,
+	RN6	= RM6,
+	DI	= DM1,
+	RI	= RM2,
+
+};
+
+struct mn10300_opcode {
+	const char	*name;
+	u_int32_t	opcode;
+	u_int32_t	opmask;
+	unsigned	exclusion;
+
+	enum format_id	format;
+
+	unsigned	cpu_mask;
+#define AM33	330
+
+	unsigned	params[2];
+#define MEM(ADDR)		(0x80000000 | (ADDR))
+#define MEM2(ADDR1, ADDR2)	(0x80000000 | (ADDR1) << 8 | (ADDR2))
+#define MEMINC(ADDR)		(0x81000000 | (ADDR))
+#define MEMINC2(ADDR, INC)	(0x81000000 | (ADDR) << 8 | (INC))
+};
+
+/* LIBOPCODES EXCERPT
+   Assemble Matsushita MN10300 instructions.
+   Copyright 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public Licence as published by
+   the Free Software Foundation; either version 2 of the Licence, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public Licence for more details.
+
+   You should have received a copy of the GNU General Public Licence
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+static const struct mn10300_opcode mn10300_opcodes[] = {
+{ "mov",	0x60,	     0xf0,	  0,    FMT_S0, 0,	{DM1, MEM(AN0)}},
+{ "mov",	0x70,	     0xf0,	  0,    FMT_S0, 0,	{MEM(AM0), DN1}},
+{ "mov",	0xf000,	     0xfff0,	  0,    FMT_D0, 0,	{MEM(AM0), AN1}},
+{ "mov",	0xf010,	     0xfff0,	  0,    FMT_D0, 0,	{AM1, MEM(AN0)}},
+{ "mov",	0xf300,	     0xffc0,	  0,    FMT_D0, 0,	{MEM2(DI, AM0), DN2}},
+{ "mov",	0xf340,	     0xffc0,	  0,    FMT_D0, 0,	{DM2, MEM2(DI, AN0)}},
+{ "mov",	0xf380,	     0xffc0,	  0,    FMT_D0, 0,	{MEM2(DI, AM0), AN2}},
+{ "mov",	0xf3c0,	     0xffc0,	  0,    FMT_D0, 0,	{AM2, MEM2(DI, AN0)}},
+{ "mov",	0xf80000,    0xfff000,    0,    FMT_D1, 0,	{MEM2(SD8, AM0), DN1}},
+{ "mov",	0xf81000,    0xfff000,    0,    FMT_D1, 0,	{DM1, MEM2(SD8, AN0)}},
+{ "mov",	0xf82000,    0xfff000,    0,    FMT_D1, 0,	{MEM2(SD8,AM0), AN1}},
+{ "mov",	0xf83000,    0xfff000,    0,    FMT_D1, 0,	{AM1, MEM2(SD8, AN0)}},
+{ "mov",	0xf8f000,    0xfffc00,    0,    FMT_D1, AM33,	{MEM2(SD8, AM0), SP}},
+{ "mov",	0xf8f400,    0xfffc00,    0,    FMT_D1, AM33,	{SP, MEM2(SD8, AN0)}},
+{ "mov",	0xf90a00,    0xffff00,    0,    FMT_D6, AM33,	{MEM(RM0), RN2}},
+{ "mov",	0xf91a00,    0xffff00,    0,    FMT_D6, AM33,	{RM2, MEM(RN0)}},
+{ "mov",	0xf96a00,    0xffff00,    0x12, FMT_D6, AM33,	{MEMINC(RM0), RN2}},
+{ "mov",	0xf97a00,    0xffff00,    0,	FMT_D6, AM33,	{RM2, MEMINC(RN0)}},
+{ "mov",	0xfa000000,  0xfff00000,  0,    FMT_D2, 0,	{MEM2(SD16, AM0), DN1}},
+{ "mov",	0xfa100000,  0xfff00000,  0,    FMT_D2, 0,	{DM1, MEM2(SD16, AN0)}},
+{ "mov",	0xfa200000,  0xfff00000,  0,    FMT_D2, 0,	{MEM2(SD16, AM0), AN1}},
+{ "mov",	0xfa300000,  0xfff00000,  0,    FMT_D2, 0,	{AM1, MEM2(SD16, AN0)}},
+{ "mov",	0xfb0a0000,  0xffff0000,  0,    FMT_D7, AM33,	{MEM2(SD8, RM0), RN2}},
+{ "mov",	0xfb1a0000,  0xffff0000,  0,    FMT_D7, AM33,	{RM2, MEM2(SD8, RN0)}},
+{ "mov",	0xfb6a0000,  0xffff0000,  0x22, FMT_D7, AM33,	{MEMINC2 (RM0, SIMM8), RN2}},
+{ "mov",	0xfb7a0000,  0xffff0000,  0,	FMT_D7, AM33,	{RM2, MEMINC2 (RN0, SIMM8)}},
+{ "mov",	0xfb8e0000,  0xffff000f,  0,    FMT_D7, AM33,	{MEM2(RI, RM0), RD2}},
+{ "mov",	0xfb9e0000,  0xffff000f,  0,    FMT_D7, AM33,	{RD2, MEM2(RI, RN0)}},
+{ "mov",	0xfc000000,  0xfff00000,  0,    FMT_D4, 0,	{MEM2(IMM32,AM0), DN1}},
+{ "mov",	0xfc100000,  0xfff00000,  0,    FMT_D4, 0,	{DM1, MEM2(IMM32,AN0)}},
+{ "mov",	0xfc200000,  0xfff00000,  0,    FMT_D4, 0,	{MEM2(IMM32,AM0), AN1}},
+{ "mov",	0xfc300000,  0xfff00000,  0,    FMT_D4, 0,	{AM1, MEM2(IMM32,AN0)}},
+{ "mov",	0xfd0a0000,  0xffff0000,  0,    FMT_D8, AM33,	{MEM2(SD24, RM0), RN2}},
+{ "mov",	0xfd1a0000,  0xffff0000,  0,    FMT_D8, AM33,	{RM2, MEM2(SD24, RN0)}},
+{ "mov",	0xfd6a0000,  0xffff0000,  0x22, FMT_D8, AM33,	{MEMINC2 (RM0, IMM24), RN2}},
+{ "mov",	0xfd7a0000,  0xffff0000,  0,	FMT_D8, AM33,	{RM2, MEMINC2 (RN0, IMM24)}},
+{ "mov",	0xfe0a0000,  0xffff0000,  0,    FMT_D9, AM33,	{MEM2(IMM32_HIGH8,RM0), RN2}},
+{ "mov",	0xfe1a0000,  0xffff0000,  0,    FMT_D9, AM33,	{RM2, MEM2(IMM32_HIGH8, RN0)}},
+{ "mov",	0xfe6a0000,  0xffff0000,  0x22, FMT_D9, AM33,	{MEMINC2 (RM0, IMM32_HIGH8), RN2}},
+{ "mov",	0xfe7a0000,  0xffff0000,  0,	FMT_D9, AM33,	{RN2, MEMINC2 (RM0, IMM32_HIGH8)}},
+
+{ "movhu",	0xf060,	     0xfff0,	  0,    FMT_D0, 0,	{MEM(AM0), DN1}},
+{ "movhu",	0xf070,	     0xfff0,	  0,    FMT_D0, 0,	{DM1, MEM(AN0)}},
+{ "movhu",	0xf480,	     0xffc0,	  0,    FMT_D0, 0,	{MEM2(DI, AM0), DN2}},
+{ "movhu",	0xf4c0,	     0xffc0,	  0,    FMT_D0, 0,	{DM2, MEM2(DI, AN0)}},
+{ "movhu",	0xf86000,    0xfff000,    0,    FMT_D1, 0,	{MEM2(SD8, AM0), DN1}},
+{ "movhu",	0xf87000,    0xfff000,    0,    FMT_D1, 0,	{DM1, MEM2(SD8, AN0)}},
+{ "movhu",	0xf94a00,    0xffff00,    0,    FMT_D6, AM33,	{MEM(RM0), RN2}},
+{ "movhu",	0xf95a00,    0xffff00,    0,    FMT_D6, AM33,	{RM2, MEM(RN0)}},
+{ "movhu",	0xf9ea00,    0xffff00,    0x12, FMT_D6, AM33,	{MEMINC(RM0), RN2}},
+{ "movhu",	0xf9fa00,    0xffff00,    0,	FMT_D6, AM33,	{RM2, MEMINC(RN0)}},
+{ "movhu",	0xfa600000,  0xfff00000,  0,    FMT_D2, 0,	{MEM2(SD16, AM0), DN1}},
+{ "movhu",	0xfa700000,  0xfff00000,  0,    FMT_D2, 0,	{DM1, MEM2(SD16, AN0)}},
+{ "movhu",	0xfb4a0000,  0xffff0000,  0,    FMT_D7, AM33,	{MEM2(SD8, RM0), RN2}},
+{ "movhu",	0xfb5a0000,  0xffff0000,  0,    FMT_D7, AM33,	{RM2, MEM2(SD8, RN0)}},
+{ "movhu",	0xfbce0000,  0xffff000f,  0,    FMT_D7, AM33,	{MEM2(RI, RM0), RD2}},
+{ "movhu",	0xfbde0000,  0xffff000f,  0,    FMT_D7, AM33,	{RD2, MEM2(RI, RN0)}},
+{ "movhu",	0xfbea0000,  0xffff0000,  0x22, FMT_D7, AM33,	{MEMINC2 (RM0, SIMM8), RN2}},
+{ "movhu",	0xfbfa0000,  0xffff0000,  0,	FMT_D7, AM33,	{RM2, MEMINC2 (RN0, SIMM8)}},
+{ "movhu",	0xfc600000,  0xfff00000,  0,    FMT_D4, 0,	{MEM2(IMM32,AM0), DN1}},
+{ "movhu",	0xfc700000,  0xfff00000,  0,    FMT_D4, 0,	{DM1, MEM2(IMM32,AN0)}},
+{ "movhu",	0xfd4a0000,  0xffff0000,  0,    FMT_D8, AM33,	{MEM2(SD24, RM0), RN2}},
+{ "movhu",	0xfd5a0000,  0xffff0000,  0,    FMT_D8, AM33,	{RM2, MEM2(SD24, RN0)}},
+{ "movhu",	0xfdea0000,  0xffff0000,  0x22, FMT_D8, AM33,	{MEMINC2 (RM0, IMM24), RN2}},
+{ "movhu",	0xfdfa0000,  0xffff0000,  0,	FMT_D8, AM33,	{RM2, MEMINC2 (RN0, IMM24)}},
+{ "movhu",	0xfe4a0000,  0xffff0000,  0,    FMT_D9, AM33,	{MEM2(IMM32_HIGH8,RM0), RN2}},
+{ "movhu",	0xfe5a0000,  0xffff0000,  0,    FMT_D9, AM33,	{RM2, MEM2(IMM32_HIGH8, RN0)}},
+{ "movhu",	0xfeea0000,  0xffff0000,  0x22, FMT_D9, AM33,	{MEMINC2 (RM0, IMM32_HIGH8), RN2}},
+{ "movhu",	0xfefa0000,  0xffff0000,  0,	FMT_D9, AM33,	{RN2, MEMINC2 (RM0, IMM32_HIGH8)}},
+{ 0, 0, 0, 0, 0, 0, {0}},
+};
+
+/*
+ * fix up misalignment problems where possible
+ */
+asmlinkage void misalignment(struct pt_regs *regs, enum exception_code code)
+{
+	const struct exception_table_entry *fixup;
+	const struct mn10300_opcode *pop;
+	unsigned long *registers = (unsigned long *) regs;
+	unsigned long data, *store, *postinc;
+	mm_segment_t seg;
+	siginfo_t info;
+	uint32_t opcode, disp, noc, xo, xm;
+	uint8_t *pc, byte;
+	void *address;
+	unsigned tmp, npop;
+
+	kdebug("MISALIGN at %lx\n", regs->pc);
+
+	if (in_interrupt())
+		die("Misalignment trap in interrupt context", regs, code);
+
+	if (regs->epsw & EPSW_IE)
+		asm volatile("or %0,epsw" : : "i"(EPSW_IE));
+
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+
+	fixup = search_exception_tables(regs->pc);
+
+	/* first thing to do is to match the opcode */
+	pc = (u_int8_t *) regs->pc;
+
+	if (__get_user(byte, pc) != 0)
+		goto fetch_error;
+	opcode = byte;
+	noc = 8;
+
+	for (pop = mn10300_opcodes; pop->name; pop++) {
+		npop = log2(pop->opcode | pop->opmask);
+		if (npop <= 0 || npop > 31)
+			continue;
+		npop = (npop + 8) & ~7;
+
+	got_more_bits:
+		if (npop == noc) {
+			if ((opcode & pop->opmask) == pop->opcode)
+				goto found_opcode;
+		} else if (npop > noc) {
+			xo = pop->opcode >> (npop - noc);
+			xm = pop->opmask >> (npop - noc);
+
+			if ((opcode & xm) != xo)
+				continue;
+
+			/* we've got a partial match (an exact match on the
+			 * first N bytes), so we need to get some more data */
+			pc++;
+			if (__get_user(byte, pc) != 0)
+				goto fetch_error;
+			opcode = opcode << 8 | byte;
+			noc += 8;
+			goto got_more_bits;
+		} else {
+			/* there's already been a partial match as long as the
+			 * complete match we're now considering, so this one
+			 * should't match */
+			continue;
+		}
+	}
+
+	/* didn't manage to find a fixup */
+	if (!user_mode(regs))
+		printk(KERN_CRIT "MISALIGN: %lx: unsupported instruction %x\n",
+		       regs->pc, opcode);
+
+failed:
+	set_fs(seg);
+	if (die_if_no_fixup("misalignment error", regs, code))
+		return;
+
+	info.si_signo	= SIGBUS;
+	info.si_errno	= 0;
+	info.si_code	= BUS_ADRALN;
+	info.si_addr	= (void *) regs->pc;
+	force_sig_info(SIGBUS, &info, current);
+	return;
+
+	/* error reading opcodes */
+fetch_error:
+	if (!user_mode(regs))
+		printk(KERN_CRIT
+		       "MISALIGN: %p: fault whilst reading instruction data\n",
+		       pc);
+	goto failed;
+
+bad_addr_mode:
+	if (!user_mode(regs))
+		printk(KERN_CRIT
+		       "MISALIGN: %lx: unsupported addressing mode %x\n",
+		       regs->pc, opcode);
+	goto failed;
+
+bad_reg_mode:
+	if (!user_mode(regs))
+		printk(KERN_CRIT
+		       "MISALIGN: %lx: unsupported register mode %x\n",
+		       regs->pc, opcode);
+	goto failed;
+
+unsupported_instruction:
+	if (!user_mode(regs))
+		printk(KERN_CRIT
+		       "MISALIGN: %lx: unsupported instruction %x (%s)\n",
+		       regs->pc, opcode, pop->name);
+	goto failed;
+
+transfer_failed:
+	set_fs(seg);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return;
+	}
+	if (die_if_no_fixup("misalignment fixup", regs, code))
+		return;
+
+	info.si_signo	= SIGSEGV;
+	info.si_errno	= 0;
+	info.si_code	= 0;
+	info.si_addr	= (void *) regs->pc;
+	force_sig_info(SIGSEGV, &info, current);
+	return;
+
+	/* we matched the opcode */
+found_opcode:
+	kdebug("MISALIGN: %lx: %x==%x { %x, %x }\n",
+	       regs->pc, opcode, pop->opcode, pop->params[0], pop->params[1]);
+
+	tmp = format_tbl[pop->format].opsz;
+	if (tmp > noc)
+		BUG(); /* match was less complete than it ought to have been */
+
+	if (tmp < noc) {
+		tmp = noc - tmp;
+		opcode >>= tmp;
+		pc -= tmp >> 3;
+	}
+
+	/* grab the extra displacement (note it's LSB first) */
+	disp = 0;
+	tmp = format_tbl[pop->format].dispsz >> 3;
+	while (tmp > 0) {
+		tmp--;
+		disp <<= 8;
+
+		pc++;
+		if (__get_user(byte, pc) != 0)
+			goto fetch_error;
+		disp |= byte;
+	}
+
+	set_fs(KERNEL_XDS);
+	if (fixup || regs->epsw & EPSW_nSL)
+		set_fs(seg);
+
+	tmp = (pop->params[0] ^ pop->params[1]) & 0x80000000;
+	if (!tmp) {
+		if (!user_mode(regs))
+			printk(KERN_CRIT
+			       "MISALIGN: %lx:"
+			       " insn not move to/from memory %x\n",
+			       regs->pc, opcode);
+		goto failed;
+	}
+
+	if (pop->params[0] & 0x80000000) {
+		/* move memory to register */
+		if (!misalignment_addr(registers, pop->params[0], opcode, disp,
+				       &address, &postinc))
+			goto bad_addr_mode;
+
+		if (!misalignment_reg(registers, pop->params[1], opcode, disp,
+				      &store))
+			goto bad_reg_mode;
+
+		if (strcmp(pop->name, "mov") == 0) {
+			kdebug("FIXUP: mov (%p),DARn\n", address);
+			if (copy_from_user(&data, (void *) address, 4) != 0)
+				goto transfer_failed;
+			if (pop->params[0] & 0x1000000)
+				*postinc += 4;
+		} else if (strcmp(pop->name, "movhu") == 0) {
+			kdebug("FIXUP: movhu (%p),DARn\n", address);
+			data = 0;
+			if (copy_from_user(&data, (void *) address, 2) != 0)
+				goto transfer_failed;
+			if (pop->params[0] & 0x1000000)
+				*postinc += 2;
+		} else {
+			goto unsupported_instruction;
+		}
+
+		*store = data;
+	} else {
+		/* move register to memory */
+		if (!misalignment_reg(registers, pop->params[0], opcode, disp,
+				      &store))
+			goto bad_reg_mode;
+
+		if (!misalignment_addr(registers, pop->params[1], opcode, disp,
+				       &address, &postinc))
+			goto bad_addr_mode;
+
+		data = *store;
+
+		if (strcmp(pop->name, "mov") == 0) {
+			kdebug("FIXUP: mov %lx,(%p)\n", data, address);
+			if (copy_to_user((void *) address, &data, 4) != 0)
+				goto transfer_failed;
+			if (pop->params[1] & 0x1000000)
+				*postinc += 4;
+		} else if (strcmp(pop->name, "movhu") == 0) {
+			kdebug("FIXUP: movhu %hx,(%p)\n",
+			       (uint16_t) data, address);
+			if (copy_to_user((void *) address, &data, 2) != 0)
+				goto transfer_failed;
+			if (pop->params[1] & 0x1000000)
+				*postinc += 2;
+		} else {
+			goto unsupported_instruction;
+		}
+	}
+
+	tmp = format_tbl[pop->format].opsz + format_tbl[pop->format].dispsz;
+	regs->pc += tmp >> 3;
+
+	set_fs(seg);
+	return;
+}
+
+/*
+ * determine the address that was being accessed
+ */
+static int misalignment_addr(unsigned long *registers, unsigned params,
+			     unsigned opcode, unsigned disp,
+			     void **_address, unsigned long **_postinc)
+{
+	unsigned long *postinc = NULL, address = 0, tmp;
+
+	params &= 0x7fffffff;
+
+	do {
+		switch (params & 0xff) {
+		case DM0:
+			postinc = &registers[Dreg_index[opcode & 0x03]];
+			address += *postinc;
+			break;
+		case DM1:
+			postinc = &registers[Dreg_index[opcode >> 2 & 0x0c]];
+			address += *postinc;
+			break;
+		case DM2:
+			postinc = &registers[Dreg_index[opcode >> 4 & 0x30]];
+			address += *postinc;
+			break;
+		case AM0:
+			postinc = &registers[Areg_index[opcode & 0x03]];
+			address += *postinc;
+			break;
+		case AM1:
+			postinc = &registers[Areg_index[opcode >> 2 & 0x0c]];
+			address += *postinc;
+			break;
+		case AM2:
+			postinc = &registers[Areg_index[opcode >> 4 & 0x30]];
+			address += *postinc;
+			break;
+		case RM0:
+			postinc = &registers[Rreg_index[opcode & 0x0f]];
+			address += *postinc;
+			break;
+		case RM1:
+			postinc = &registers[Rreg_index[opcode >> 2 & 0x0f]];
+			address += *postinc;
+			break;
+		case RM2:
+			postinc = &registers[Rreg_index[opcode >> 4 & 0x0f]];
+			address += *postinc;
+			break;
+		case RM4:
+			postinc = &registers[Rreg_index[opcode >> 8 & 0x0f]];
+			address += *postinc;
+			break;
+		case RM6:
+			postinc = &registers[Rreg_index[opcode >> 12 & 0x0f]];
+			address += *postinc;
+			break;
+		case RD0:
+			postinc = &registers[Rreg_index[disp & 0x0f]];
+			address += *postinc;
+			break;
+		case RD2:
+			postinc = &registers[Rreg_index[disp >> 4 & 0x0f]];
+			address += *postinc;
+			break;
+
+		case SD8:
+		case SIMM8:
+			address += (int32_t) (int8_t) (disp & 0xff);
+			break;
+		case SD16:
+			address += (int32_t) (int16_t) (disp & 0xffff);
+			break;
+		case SD24:
+			tmp = disp << 8;
+			asm("asr 8,%0" : "=r"(tmp) : "0"(tmp));
+			address += tmp;
+			break;
+		case SIMM4_2:
+			tmp = opcode >> 4 & 0x0f;
+			tmp <<= 28;
+			asm("asr 28,%0" : "=r"(tmp) : "0"(tmp));
+			address += tmp;
+			break;
+		case IMM24:
+			address += disp & 0x00ffffff;
+			break;
+		case IMM32:
+		case IMM32_HIGH8:
+			address += disp;
+			break;
+		default:
+			return 0;
+		}
+	} while ((params >>= 8));
+
+	*_address = (void *) address;
+	*_postinc = postinc;
+	return 1;
+}
+
+/*
+ * determine the register that is acting as source/dest
+ */
+static int misalignment_reg(unsigned long *registers, unsigned params,
+			    unsigned opcode, unsigned disp,
+			    unsigned long **_register)
+{
+	params &= 0x7fffffff;
+
+	if (params & 0xffffff00)
+		return 0;
+
+	switch (params & 0xff) {
+	case DM0:
+		*_register = &registers[Dreg_index[opcode & 0x03]];
+		break;
+	case DM1:
+		*_register = &registers[Dreg_index[opcode >> 2 & 0x03]];
+		break;
+	case DM2:
+		*_register = &registers[Dreg_index[opcode >> 4 & 0x03]];
+		break;
+	case AM0:
+		*_register = &registers[Areg_index[opcode & 0x03]];
+		break;
+	case AM1:
+		*_register = &registers[Areg_index[opcode >> 2 & 0x03]];
+		break;
+	case AM2:
+		*_register = &registers[Areg_index[opcode >> 4 & 0x03]];
+		break;
+	case RM0:
+		*_register = &registers[Rreg_index[opcode & 0x0f]];
+		break;
+	case RM1:
+		*_register = &registers[Rreg_index[opcode >> 2 & 0x0f]];
+		break;
+	case RM2:
+		*_register = &registers[Rreg_index[opcode >> 4 & 0x0f]];
+		break;
+	case RM4:
+		*_register = &registers[Rreg_index[opcode >> 8 & 0x0f]];
+		break;
+	case RM6:
+		*_register = &registers[Rreg_index[opcode >> 12 & 0x0f]];
+		break;
+	case RD0:
+		*_register = &registers[Rreg_index[disp & 0x0f]];
+		break;
+	case RD2:
+		*_register = &registers[Rreg_index[disp >> 4 & 0x0f]];
+		break;
+	case SP:
+		*_register = &registers[REG_SP >> 2];
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 1;
+}
diff --git a/arch/mn10300/mm/mmu-context.c b/arch/mn10300/mm/mmu-context.c
new file mode 100644
index 000000000000..31c9d27a75ae
--- /dev/null
+++ b/arch/mn10300/mm/mmu-context.c
@@ -0,0 +1,80 @@
+/* MN10300 MMU context allocation and management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+/*
+ * list of the MMU contexts last allocated on each CPU
+ */
+unsigned long mmu_context_cache[NR_CPUS] = {
+	[0 ... NR_CPUS - 1] = MMU_CONTEXT_FIRST_VERSION * 2 - 1,
+};
+
+/*
+ * flush the specified TLB entry
+ */
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long pteu, cnx, flags;
+
+	addr &= PAGE_MASK;
+
+	/* make sure the context doesn't migrate and defend against
+	 * interference from vmalloc'd regions */
+	local_irq_save(flags);
+
+	cnx = mm_context(vma->vm_mm);
+
+	if (cnx != MMU_NO_CONTEXT) {
+		pteu = addr | (cnx & 0x000000ffUL);
+		IPTEU = pteu;
+		DPTEU = pteu;
+		if (IPTEL & xPTEL_V)
+			IPTEL = 0;
+		if (DPTEL & xPTEL_V)
+			DPTEL = 0;
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * preemptively set a TLB entry
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long pteu, ptel, cnx, flags;
+
+	addr &= PAGE_MASK;
+	ptel = pte_val(pte) & ~(xPTEL_UNUSED1 | xPTEL_UNUSED2);
+
+	/* make sure the context doesn't migrate and defend against
+	 * interference from vmalloc'd regions */
+	local_irq_save(flags);
+
+	cnx = mm_context(vma->vm_mm);
+
+	if (cnx != MMU_NO_CONTEXT) {
+		pteu = addr | (cnx & 0x000000ffUL);
+		if (!(pte_val(pte) & _PAGE_NX)) {
+			IPTEU = pteu;
+			if (IPTEL & xPTEL_V)
+				IPTEL = ptel;
+		}
+		DPTEU = pteu;
+		if (DPTEL & xPTEL_V)
+			DPTEL = ptel;
+	}
+
+	local_irq_restore(flags);
+}
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
new file mode 100644
index 000000000000..a477038752ba
--- /dev/null
+++ b/arch/mn10300/mm/pgtable.c
@@ -0,0 +1,197 @@
+/* MN10300 Page table management
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/quicklist.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+void show_mem(void)
+{
+	unsigned long i;
+	int free = 0, total = 0, reserved = 0, shared = 0;
+
+	int cached = 0;
+	printk(KERN_INFO "Mem-info:\n");
+	show_free_areas();
+	i = max_mapnr;
+	while (i-- > 0) {
+		total++;
+		if (PageReserved(mem_map + i))
+			reserved++;
+		else if (PageSwapCache(mem_map + i))
+			cached++;
+		else if (!page_count(mem_map + i))
+			free++;
+		else
+			shared += page_count(mem_map + i) - 1;
+	}
+	printk(KERN_INFO "%d pages of RAM\n", total);
+	printk(KERN_INFO "%d free pages\n", free);
+	printk(KERN_INFO "%d reserved pages\n", reserved);
+	printk(KERN_INFO "%d pages shared\n", shared);
+	printk(KERN_INFO "%d pages swap cached\n", cached);
+}
+
+/*
+ * Associate a large virtual page frame with a given physical page frame
+ * and protection flags for that frame. pfn is for the base of the page,
+ * vaddr is what the page gets mapped to - both must be properly aligned.
+ * The pmd must already be instantiated. Assumes PAE mode.
+ */
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (vaddr & (PMD_SIZE-1)) {		/* vaddr is misaligned */
+		printk(KERN_ERR "set_pmd_pfn: vaddr misaligned\n");
+		return; /* BUG(); */
+	}
+	if (pfn & (PTRS_PER_PTE-1)) {		/* pfn is misaligned */
+		printk(KERN_ERR "set_pmd_pfn: pfn misaligned\n");
+		return; /* BUG(); */
+	}
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		printk(KERN_ERR "set_pmd_pfn: pgd_none\n");
+		return; /* BUG(); */
+	}
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	set_pmd(pmd, pfn_pmd(pfn, flags));
+	/*
+	 * It's enough to flush this one mapping.
+	 * (PGE mappings get flushed as well)
+	 */
+	__flush_tlb_one(vaddr);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		clear_page(pte);
+	return pte;
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *pte;
+
+#ifdef CONFIG_HIGHPTE
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+#else
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+#endif
+	if (pte)
+		clear_highpage(pte);
+	return pte;
+}
+
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * If the locking proves to be non-performant, a ticketing scheme with
+ * checks at dup_mmap(), exec(), and other mmlist addition points
+ * could be used. The locking scheme was chosen on the basis of
+ * manfred's recommendations and having no core impact whatsoever.
+ * -- wli
+ */
+DEFINE_SPINLOCK(pgd_lock);
+struct page *pgd_list;
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+	page->index = (unsigned long) pgd_list;
+	if (pgd_list)
+		set_page_private(pgd_list, (unsigned long) &page->index);
+	pgd_list = page;
+	set_page_private(page, (unsigned long) &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *next, **pprev, *page = virt_to_page(pgd);
+	next = (struct page *) page->index;
+	pprev = (struct page **) page_private(page);
+	*pprev = next;
+	if (next)
+		set_page_private(next, (unsigned long) pprev);
+}
+
+void pgd_ctor(void *pgd)
+{
+	unsigned long flags;
+
+	if (PTRS_PER_PMD == 1)
+		spin_lock_irqsave(&pgd_lock, flags);
+
+	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+			swapper_pg_dir + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+
+	if (PTRS_PER_PMD > 1)
+		return;
+
+	pgd_list_add(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+	memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+}
+
+/* never called when PTRS_PER_PMD > 1 */
+void pgd_dtor(void *pgd)
+{
+	unsigned long flags; /* can be called from interrupt context */
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	quicklist_free(0, pgd_dtor, pgd);
+}
+
+void __init pgtable_cache_init(void)
+{
+}
+
+void check_pgt_cache(void)
+{
+	quicklist_trim(0, pgd_dtor, 25, 16);
+}
diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S
new file mode 100644
index 000000000000..789208094e98
--- /dev/null
+++ b/arch/mn10300/mm/tlb-mn10300.S
@@ -0,0 +1,207 @@
+###############################################################################
+#
+# TLB loading functions
+#
+# Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+# Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+# Modified by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public Licence
+# as published by the Free Software Foundation; either version
+# 2 of the Licence, or (at your option) any later version.
+#
+###############################################################################
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/smp.h>
+#include <asm/intctl-regs.h>
+#include <asm/frame.inc>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+###############################################################################
+#
+# Instruction TLB Miss handler entry point
+#
+###############################################################################
+	.type	itlb_miss,@function
+ENTRY(itlb_miss)
+	and	~EPSW_NMID,epsw
+#ifdef CONFIG_GDBSTUB
+	movm	[d2,d3,a2],(sp)
+#else
+	or	EPSW_nAR,epsw		# switch D0-D3 & A0-A3 to the alternate
+					# register bank
+	nop
+	nop
+	nop
+#endif
+
+	mov	(IPTEU),d3
+	mov	(PTBR),a2
+	mov	d3,d2
+	and	0xffc00000,d2
+	lsr	20,d2
+	mov	(a2,d2),a2		# PTD *ptd = PGD[addr 31..22]
+	btst	_PAGE_VALID,a2
+	beq	itlb_miss_fault		# jump if doesn't point anywhere
+
+	and	~(PAGE_SIZE-1),a2
+	mov	d3,d2
+	and	0x003ff000,d2
+	lsr	10,d2
+	add	d2,a2
+	mov	(a2),d2			# get pte from PTD[addr 21..12]
+	btst	_PAGE_VALID,d2
+	beq	itlb_miss_fault		# jump if doesn't point to a page
+					# (might be a swap id)
+	bset	_PAGE_ACCESSED,(0,a2)
+	and	~(xPTEL_UNUSED1|xPTEL_UNUSED2),d2
+itlb_miss_set:
+	mov	d2,(IPTEL)		# change the TLB
+#ifdef CONFIG_GDBSTUB
+	movm	(sp),[d2,d3,a2]
+#endif
+	rti
+
+itlb_miss_fault:
+	mov	_PAGE_VALID,d2		# force address error handler to be
+					# invoked
+	bra	itlb_miss_set
+
+	.size	itlb_miss, . - itlb_miss
+
+###############################################################################
+#
+# Data TLB Miss handler entry point
+#
+###############################################################################
+	.type	dtlb_miss,@function
+ENTRY(dtlb_miss)
+	and	~EPSW_NMID,epsw
+#ifdef CONFIG_GDBSTUB
+	movm	[d2,d3,a2],(sp)
+#else
+	or	EPSW_nAR,epsw		# switch D0-D3 & A0-A3 to the alternate
+					# register bank
+	nop
+	nop
+	nop
+#endif
+
+	mov	(DPTEU),d3
+	mov	(PTBR),a2
+	mov	d3,d2
+	and	0xffc00000,d2
+	lsr	20,d2
+	mov	(a2,d2),a2		# PTD *ptd = PGD[addr 31..22]
+	btst	_PAGE_VALID,a2
+	beq	dtlb_miss_fault		# jump if doesn't point anywhere
+
+	and	~(PAGE_SIZE-1),a2
+	mov	d3,d2
+	and	0x003ff000,d2
+	lsr	10,d2
+	add	d2,a2
+	mov	(a2),d2			# get pte from PTD[addr 21..12]
+	btst	_PAGE_VALID,d2
+	beq	dtlb_miss_fault		# jump if doesn't point to a page
+					# (might be a swap id)
+	bset	_PAGE_ACCESSED,(0,a2)
+	and	~(xPTEL_UNUSED1|xPTEL_UNUSED2),d2
+dtlb_miss_set:
+	mov	d2,(DPTEL)		# change the TLB
+#ifdef CONFIG_GDBSTUB
+	movm	(sp),[d2,d3,a2]
+#endif
+	rti
+
+dtlb_miss_fault:
+	mov	_PAGE_VALID,d2		# force address error handler to be
+					# invoked
+	bra	dtlb_miss_set
+	.size	dtlb_miss, . - dtlb_miss
+
+###############################################################################
+#
+# Instruction TLB Address Error handler entry point
+#
+###############################################################################
+	.type	itlb_aerror,@function
+ENTRY(itlb_aerror)
+	and	~EPSW_NMID,epsw
+	add	-4,sp
+	SAVE_ALL
+	add	-4,sp				# need to pass three params
+
+	# calculate the fault code
+	movhu	(MMUFCR_IFC),d1
+	or	0x00010000,d1			# it's an instruction fetch
+
+	# determine the page address
+	mov	(IPTEU),a2
+	mov	a2,d0
+	and	PAGE_MASK,d0
+	mov	d0,(12,sp)
+
+	clr	d0
+	mov	d0,(IPTEL)
+
+	and	~EPSW_NMID,epsw
+	or	EPSW_IE,epsw
+	mov	fp,d0
+	call	do_page_fault[],0		# do_page_fault(regs,code,addr
+
+	jmp	ret_from_exception
+	.size	itlb_aerror, . - itlb_aerror
+
+###############################################################################
+#
+# Data TLB Address Error handler entry point
+#
+###############################################################################
+	.type	dtlb_aerror,@function
+ENTRY(dtlb_aerror)
+	and	~EPSW_NMID,epsw
+	add	-4,sp
+	mov	d1,(sp)
+
+	movhu	(MMUFCR_DFC),d1			# is it the initial valid write
+						# to this page?
+	and	MMUFCR_xFC_INITWR,d1
+ 	beq	dtlb_pagefault			# jump if not
+
+	mov	(DPTEL),d1			# set the dirty bit
+						# (don't replace with BSET!)
+	or	_PAGE_DIRTY,d1
+	mov	d1,(DPTEL)
+	mov	(sp),d1
+	add	4,sp
+ 	rti
+
+	ALIGN
+dtlb_pagefault:
+	mov	(sp),d1
+	SAVE_ALL
+	add	-4,sp				# need to pass three params
+
+	# calculate the fault code
+	movhu	(MMUFCR_DFC),d1
+
+	# determine the page address
+	mov	(DPTEU),a2
+	mov	a2,d0
+	and	PAGE_MASK,d0
+	mov	d0,(12,sp)
+
+	clr	d0
+	mov	d0,(DPTEL)
+
+	and	~EPSW_NMID,epsw
+	or	EPSW_IE,epsw
+	mov	fp,d0
+	call	do_page_fault[],0		# do_page_fault(regs,code,addr
+
+	jmp	ret_from_exception
+	.size	dtlb_aerror, . - dtlb_aerror
diff --git a/arch/mn10300/oprofile/Kconfig b/arch/mn10300/oprofile/Kconfig
new file mode 100644
index 000000000000..19d37730b664
--- /dev/null
+++ b/arch/mn10300/oprofile/Kconfig
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff --git a/arch/mn10300/oprofile/Makefile b/arch/mn10300/oprofile/Makefile
new file mode 100644
index 000000000000..918dbe60ebb6
--- /dev/null
+++ b/arch/mn10300/oprofile/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for the MN10300-specific profiling code
+#
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) op_model_null.o
+
diff --git a/arch/mn10300/oprofile/op_model_null.c b/arch/mn10300/oprofile/op_model_null.c
new file mode 100644
index 000000000000..cd4ab374bc4f
--- /dev/null
+++ b/arch/mn10300/oprofile/op_model_null.c
@@ -0,0 +1,22 @@
+/* Null profiling driver
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * Licence.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	return -ENODEV;
+}
+
+void oprofile_arch_exit(void)
+{
+}
+
diff --git a/arch/mn10300/proc-mn103e010/Makefile b/arch/mn10300/proc-mn103e010/Makefile
new file mode 100644
index 000000000000..ac2c9784cd21
--- /dev/null
+++ b/arch/mn10300/proc-mn103e010/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the MN103E010 processor chip specific code
+#
+obj-y   := proc-init.o
+
diff --git a/arch/mn10300/proc-mn103e010/proc-init.c b/arch/mn10300/proc-mn103e010/proc-init.c
new file mode 100644
index 000000000000..9a482efafa82
--- /dev/null
+++ b/arch/mn10300/proc-mn103e010/proc-init.c
@@ -0,0 +1,75 @@
+/* MN103E010 Processor initialisation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <asm/rtc.h>
+
+/*
+ * initialise the on-silicon processor peripherals
+ */
+asmlinkage void __init processor_init(void)
+{
+	int loop;
+
+	/* set up the exception table first */
+	for (loop = 0x000; loop < 0x400; loop += 8)
+		__set_intr_stub(loop, __common_exception);
+
+	__set_intr_stub(EXCEP_ITLBMISS,		itlb_miss);
+	__set_intr_stub(EXCEP_DTLBMISS,		dtlb_miss);
+	__set_intr_stub(EXCEP_IAERROR,		itlb_aerror);
+	__set_intr_stub(EXCEP_DAERROR,		dtlb_aerror);
+	__set_intr_stub(EXCEP_BUSERROR,		raw_bus_error);
+	__set_intr_stub(EXCEP_DOUBLE_FAULT,	double_fault);
+	__set_intr_stub(EXCEP_SYSCALL0,		system_call);
+
+	__set_intr_stub(EXCEP_NMI,		nmi_handler);
+	__set_intr_stub(EXCEP_WDT,		nmi_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL0,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL1,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL2,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL3,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL4,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL5,	irq_handler);
+	__set_intr_stub(EXCEP_IRQ_LEVEL6,	irq_handler);
+
+	IVAR0 = EXCEP_IRQ_LEVEL0;
+	IVAR1 = EXCEP_IRQ_LEVEL1;
+	IVAR2 = EXCEP_IRQ_LEVEL2;
+	IVAR3 = EXCEP_IRQ_LEVEL3;
+	IVAR4 = EXCEP_IRQ_LEVEL4;
+	IVAR5 = EXCEP_IRQ_LEVEL5;
+	IVAR6 = EXCEP_IRQ_LEVEL6;
+
+	mn10300_dcache_flush_inv();
+	mn10300_icache_inv();
+
+	/* disable all interrupts and set to priority 6 (lowest) */
+	for (loop = 0; loop < NR_IRQS; loop++)
+		GxICR(loop) = GxICR_LEVEL_6 | GxICR_DETECT;
+
+	/* clear the timers */
+	TM0MD	= 0;
+	TM1MD	= 0;
+	TM2MD	= 0;
+	TM3MD	= 0;
+	TM4MD	= 0;
+	TM5MD	= 0;
+	TM6MD	= 0;
+	TM6MDA	= 0;
+	TM6MDB	= 0;
+	TM7MD	= 0;
+	TM8MD	= 0;
+	TM9MD	= 0;
+	TM10MD	= 0;
+	TM11MD	= 0;
+
+	calibrate_clock();
+}
diff --git a/arch/mn10300/unit-asb2303/Makefile b/arch/mn10300/unit-asb2303/Makefile
new file mode 100644
index 000000000000..03e579fa99d0
--- /dev/null
+++ b/arch/mn10300/unit-asb2303/Makefile
@@ -0,0 +1,6 @@
+###############################################################################
+#
+# Makefile for the ASB2303 board
+#
+###############################################################################
+obj-y   := unit-init.o smc91111.o leds.o
diff --git a/arch/mn10300/unit-asb2303/leds.c b/arch/mn10300/unit-asb2303/leds.c
new file mode 100644
index 000000000000..cd4bc78ccfc8
--- /dev/null
+++ b/arch/mn10300/unit-asb2303/leds.c
@@ -0,0 +1,52 @@
+/* ASB2303 peripheral 7-segment LEDs x1 support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/intctl-regs.h>
+#include <asm/rtc-regs.h>
+#include <asm/unit/leds.h>
+
+#if 0
+static const u8 asb2303_led_hex_tbl[16] = {
+	0x80, 0xf2, 0x48, 0x60, 0x32, 0x24, 0x04, 0xf0,
+	0x00, 0x20, 0x10, 0x06, 0x8c, 0x42, 0x0c, 0x1c
+};
+#endif
+
+static const u8 asb2303_led_chase_tbl[6] = {
+	~0x02,	/* top		- segA */
+	~0x04,	/* right top	- segB */
+	~0x08,	/* right bottom	- segC */
+	~0x10,	/* bottom	- segD */
+	~0x20,	/* left bottom	- segE */
+	~0x40,	/* left top	- segF */
+};
+
+static unsigned asb2303_led_chase;
+
+void peripheral_leds_display_exception(enum exception_code code)
+{
+	ASB2303_GPIO0DEF = 0x5555;	/* configure as an output port */
+	ASB2303_7SEGLEDS = 0x6d;	/* triple horizontal bar */
+}
+
+void peripheral_leds_led_chase(void)
+{
+	ASB2303_GPIO0DEF = 0x5555;	/* configure as an output port */
+	ASB2303_7SEGLEDS = asb2303_led_chase_tbl[asb2303_led_chase];
+	asb2303_led_chase++;
+	if (asb2303_led_chase >= 6)
+		asb2303_led_chase = 0;
+}
diff --git a/arch/mn10300/unit-asb2303/smc91111.c b/arch/mn10300/unit-asb2303/smc91111.c
new file mode 100644
index 000000000000..30875dd65631
--- /dev/null
+++ b/arch/mn10300/unit-asb2303/smc91111.c
@@ -0,0 +1,52 @@
+/* ASB2303 initialisation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/timex.h>
+#include <asm/processor.h>
+#include <asm/intctl-regs.h>
+#include <asm/unit/smc91111.h>
+
+static struct resource smc91c111_resources[] = {
+	[0] = {
+		.start		= SMC91111_BASE,
+		.end		= SMC91111_BASE_END,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= SMC91111_IRQ,
+		.end		= SMC91111_IRQ,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device smc91c111_device = {
+	.name		= "smc91x",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(smc91c111_resources),
+	.resource	= smc91c111_resources,
+};
+
+/*
+ * add platform devices
+ */
+static int __init unit_device_init(void)
+{
+	platform_device_register(&smc91c111_device);
+	return 0;
+}
+
+device_initcall(unit_device_init);
diff --git a/arch/mn10300/unit-asb2303/unit-init.c b/arch/mn10300/unit-asb2303/unit-init.c
new file mode 100644
index 000000000000..14b2c817cff8
--- /dev/null
+++ b/arch/mn10300/unit-asb2303/unit-init.c
@@ -0,0 +1,60 @@
+/* ASB2303 initialisation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/device.h>
+
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/intctl-regs.h>
+
+/*
+ * initialise some of the unit hardware before gdbstub is set up
+ */
+asmlinkage void __init unit_init(void)
+{
+	/* set up the external interrupts */
+	SET_XIRQ_TRIGGER(0, XIRQ_TRIGGER_HILEVEL);
+	SET_XIRQ_TRIGGER(2, XIRQ_TRIGGER_LOWLEVEL);
+	SET_XIRQ_TRIGGER(3, XIRQ_TRIGGER_HILEVEL);
+	SET_XIRQ_TRIGGER(4, XIRQ_TRIGGER_LOWLEVEL);
+	SET_XIRQ_TRIGGER(5, XIRQ_TRIGGER_LOWLEVEL);
+}
+
+/*
+ * initialise the rest of the unit hardware after gdbstub is ready
+ */
+void __init unit_setup(void)
+{
+}
+
+/*
+ * initialise the external interrupts used by a unit of this type
+ */
+void __init unit_init_IRQ(void)
+{
+	unsigned int extnum;
+
+	for (extnum = 0; extnum < NR_XIRQS; extnum++) {
+		switch (GET_XIRQ_TRIGGER(extnum)) {
+		case XIRQ_TRIGGER_HILEVEL:
+		case XIRQ_TRIGGER_LOWLEVEL:
+			set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq);
+			break;
+		default:
+			break;
+		}
+	}
+}
diff --git a/arch/mn10300/unit-asb2305/Makefile b/arch/mn10300/unit-asb2305/Makefile
new file mode 100644
index 000000000000..0551022225b3
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/Makefile
@@ -0,0 +1,8 @@
+###############################################################################
+#
+# Makefile for the ASB2305 board
+#
+###############################################################################
+obj-y   := unit-init.o leds.o
+
+obj-$(CONFIG_PCI) += pci.o pci-asb2305.o pci-irq.o pci-iomap.o
diff --git a/arch/mn10300/unit-asb2305/leds.c b/arch/mn10300/unit-asb2305/leds.c
new file mode 100644
index 000000000000..e99dcc9cee1a
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/leds.c
@@ -0,0 +1,124 @@
+/* ASB2305 Peripheral 7-segment LEDs x4 support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/cpu/intctl-regs.h>
+#include <asm/cpu/rtc-regs.h>
+#include <asm/unit/leds.h>
+
+static const u8 asb2305_led_hex_tbl[16] = {
+	0x80, 0xf2, 0x48, 0x60, 0x32, 0x24, 0x04, 0xf0,
+	0x00, 0x20, 0x10, 0x06, 0x8c, 0x42, 0x0c, 0x1c
+};
+
+static const u32 asb2305_led_chase_tbl[6] = {
+	~0x02020202,	/* top		- segA */
+	~0x04040404,	/* right top	- segB */
+	~0x08080808,	/* right bottom	- segC */
+	~0x10101010,	/* bottom	- segD */
+	~0x20202020,	/* left bottom	- segE */
+	~0x40404040,	/* left top	- segF */
+};
+
+static unsigned asb2305_led_chase;
+
+void peripheral_leds7x4_display_dec(unsigned int val, unsigned int points)
+{
+	u32 leds;
+
+	leds = asb2305_led_hex_tbl[(val/1000) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(val/100) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(val/10) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[val % 10];
+	leds |= points^0x01010101;
+
+	ASB2305_7SEGLEDS = leds;
+}
+
+void peripheral_leds7x4_display_hex(unsigned int val, unsigned int points)
+{
+	u32 leds;
+
+	leds = asb2305_led_hex_tbl[(val/1000) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(val/100) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(val/10) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[val % 10];
+	leds |= points^0x01010101;
+
+	ASB2305_7SEGLEDS = leds;
+}
+
+void peripheral_leds_display_exception(enum exception_code code)
+{
+	u32 leds;
+
+	leds = asb2305_led_hex_tbl[(code/0x100) % 0x10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(code/0x10) % 0x10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[code % 0x10];
+	leds |= 0x6d010101;
+
+	ASB2305_7SEGLEDS = leds;
+}
+
+void peripheral_leds7x4_display_minssecs(unsigned int time, unsigned int points)
+{
+	u32 leds;
+
+	leds = asb2305_led_hex_tbl[(time/600) % 6];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(time/60) % 10];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[(time/10) % 6];
+	leds <<= 8;
+	leds |= asb2305_led_hex_tbl[time % 10];
+	leds |= points^0x01010101;
+
+	ASB2305_7SEGLEDS = leds;
+}
+
+void peripheral_leds7x4_display_rtc(void)
+{
+	unsigned int clock;
+	u8 mins, secs;
+
+	mins = RTMCR;
+	secs = RTSCR;
+
+	clock = ((mins & 0xf0) >> 4);
+	clock *= 10;
+	clock += (mins & 0x0f);
+	clock *= 6;
+
+	clock += ((secs & 0xf0) >> 4);
+	clock *= 10;
+	clock += (secs & 0x0f);
+
+	peripheral_leds7x4_display_minssecs(clock, 0);
+}
+
+void peripheral_leds_led_chase(void)
+{
+	ASB2305_7SEGLEDS = asb2305_led_chase_tbl[asb2305_led_chase];
+	asb2305_led_chase++;
+	if (asb2305_led_chase >= 6)
+		asb2305_led_chase = 0;
+}
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
new file mode 100644
index 000000000000..d100ca788468
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -0,0 +1,303 @@
+/* ASB2305 PCI resource stuff
+ *
+ * Copyright (C) 2001 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from arch/i386/pci-i386.c
+ *   - Copyright 1997--2000 Martin Mares <mj@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+#include "pci-asb2305.h"
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+#if 0
+	struct pci_dev *dev = data;
+
+	printk(KERN_DEBUG
+	       "### PCIBIOS_ALIGN_RESOURCE(%s,,{%08lx-%08lx,%08lx},%lx)\n",
+	       pci_name(dev),
+	       res->start,
+	       res->end,
+	       res->flags,
+	       size
+	       );
+#endif
+
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long start = res->start;
+
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	}
+}
+
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *	- I/O or memory regions not configured
+ *	- regions configured, but not enabled in the command register
+ *	- bogus I/O addresses above 64K used
+ *	- expansion ROMs left enabled (this may sound harmless, but given
+ *	  the fact the PCI specs explicitly allow address decoders to be
+ *	  shared between expansion ROMs and other resource regions, it's
+ *	  at least dangerous)
+ *
+ *  Our solution:
+ *	(1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *	    This gives us fixed barriers on where we can allocate.
+ *	(2) Allocate resources for all enabled devices.  If there is
+ *	    a collision, just mark the resource as unallocated. Also
+ *	    disable expansion ROMs during this step.
+ *	(3) Try to allocate resources for disabled devices.  If the
+ *	    resources were assigned correctly, everything goes well,
+ *	    if they weren't, they won't disturb allocation of other
+ *	    resources.
+ *	(4) Assign new addresses to resources which were either
+ *	    not configured at all or misconfigured.  If explicitly
+ *	    requested by the user, configure expansion ROM address
+ *	    as well.
+ */
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	int idx;
+	struct resource *r, *pr;
+
+	/* Depth-First Search on bus tree */
+	list_for_each_entry(bus, bus_list, node) {
+		dev = bus->self;
+		if (dev) {
+			for (idx = PCI_BRIDGE_RESOURCES;
+			     idx < PCI_NUM_RESOURCES;
+			     idx++) {
+				r = &dev->resource[idx];
+				if (!r->flags)
+					continue;
+				pr = pci_find_parent_resource(dev, r);
+				if (!r->start ||
+				    !pr ||
+				    request_resource(pr, r) < 0) {
+					printk(KERN_ERR "PCI:"
+					       " Cannot allocate resource"
+					       " region %d of bridge %s\n",
+					       idx, pci_name(dev));
+					/* Something is wrong with the region.
+					 * Invalidate the resource to prevent
+					 * child resource allocations in this
+					 * range. */
+					r->flags = 0;
+				}
+			}
+		}
+		pcibios_allocate_bus_resources(&bus->children);
+	}
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+	struct pci_dev *dev = NULL;
+	int idx, disabled;
+	u16 command;
+	struct resource *r, *pr;
+
+	for_each_pci_dev(dev) {
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		for (idx = 0; idx < 6; idx++) {
+			r = &dev->resource[idx];
+			if (r->parent)		/* Already allocated */
+				continue;
+			if (!r->start)		/* Address not assigned */
+				continue;
+			if (r->flags & IORESOURCE_IO)
+				disabled = !(command & PCI_COMMAND_IO);
+			else
+				disabled = !(command & PCI_COMMAND_MEMORY);
+			if (pass == disabled) {
+				DBG("PCI[%s]: Resource %08lx-%08lx"
+				    " (f=%lx, d=%d, p=%d)\n",
+				    pci_name(dev), r->start, r->end, r->flags,
+				    disabled, pass);
+				pr = pci_find_parent_resource(dev, r);
+				if (!pr || request_resource(pr, r) < 0) {
+					printk(KERN_ERR "PCI:"
+					       " Cannot allocate resource"
+					       " region %d of device %s\n",
+					       idx, pci_name(dev));
+					/* We'll assign a new address later */
+					r->end -= r->start;
+					r->start = 0;
+				}
+			}
+		}
+		if (!pass) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			if (r->flags & IORESOURCE_ROM_ENABLE) {
+				/* Turn the ROM off, leave the resource region,
+				 * but keep it unregistered. */
+				u32 reg;
+				DBG("PCI: Switching off ROM of %s\n",
+				    pci_name(dev));
+				r->flags &= ~IORESOURCE_ROM_ENABLE;
+				pci_read_config_dword(
+					dev, dev->rom_base_reg, &reg);
+				pci_write_config_dword(
+					dev, dev->rom_base_reg,
+					reg & ~PCI_ROM_ADDRESS_ENABLE);
+			}
+		}
+	}
+}
+
+static int __init pcibios_assign_resources(void)
+{
+	struct pci_dev *dev = NULL;
+	struct resource *r, *pr;
+
+	if (!(pci_probe & PCI_ASSIGN_ROMS)) {
+		/* Try to use BIOS settings for ROMs, otherwise let
+		   pci_assign_unassigned_resources() allocate the new
+		   addresses. */
+		for_each_pci_dev(dev) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			if (!r->flags || !r->start)
+				continue;
+			pr = pci_find_parent_resource(dev, r);
+			if (!pr || request_resource(pr, r) < 0) {
+				r->end -= r->start;
+				r->start = 0;
+			}
+		}
+	}
+
+	pci_assign_unassigned_resources();
+
+	return 0;
+}
+
+fs_initcall(pcibios_assign_resources);
+
+void __init pcibios_resource_survey(void)
+{
+	DBG("PCI: Allocating resources\n");
+	pcibios_allocate_bus_resources(&pci_root_buses);
+	pcibios_allocate_resources(0);
+	pcibios_allocate_resources(1);
+}
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+
+	for (idx = 0; idx < 6; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1 << idx)))
+			continue;
+
+		r = &dev->resource[idx];
+
+		if (!r->start && r->end) {
+			printk(KERN_ERR
+			       "PCI: Device %s not available because of"
+			       " resource collisions\n",
+			       pci_name(dev));
+			return -EINVAL;
+		}
+
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+
+	if (cmd != old_cmd)
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long prot;
+
+	/* Leave vm_pgoff as-is, the PCI space address is the physical
+	 * address on this platform.
+	 */
+	vma->vm_flags |= VM_LOCKED | VM_IO;
+
+	prot = pgprot_val(vma->vm_page_prot);
+	prot &= ~_PAGE_CACHE;
+	vma->vm_page_prot = __pgprot(prot);
+
+	/* Write-combine setting is ignored */
+	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.h b/arch/mn10300/unit-asb2305/pci-asb2305.h
new file mode 100644
index 000000000000..84634fa3bce6
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.h
@@ -0,0 +1,82 @@
+/* ASB2305 Arch-specific PCI declarations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Derived from: arch/i386/kernel/pci-i386.h: (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _PCI_ASB2305_H
+#define _PCI_ASB2305_H
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS 1
+#define PCI_PROBE_CONF1 2
+#define PCI_PROBE_CONF2 4
+#define PCI_NO_SORT 0x100
+#define PCI_BIOS_SORT 0x200
+#define PCI_NO_CHECKS 0x400
+#define PCI_ASSIGN_ROMS 0x1000
+#define PCI_BIOS_IRQ_SCAN 0x2000
+
+extern unsigned int pci_probe;
+
+/* pci-asb2305.c */
+
+extern unsigned int pcibios_max_latency;
+
+extern void pcibios_resource_survey(void);
+extern int pcibios_enable_resources(struct pci_dev *dev, int mask);
+
+/* pci.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+extern struct irq_routing_table *pcibios_get_irq_routing_table(void);
+extern int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* pci-irq.c */
+
+struct irq_info {
+	u8 bus, devfn;			/* Bus, device and function */
+	struct {
+		u8 link;		/* IRQ line ID, chipset dependent,
+					 * 0=not routed */
+		u16 bitmap;		/* Available IRQs */
+	} __attribute__((packed)) irq[4];
+	u8 slot;			/* Slot number, 0=onboard */
+	u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+	u32 signature;			/* PIRQ_SIGNATURE should be here */
+	u16 version;			/* PIRQ_VERSION */
+	u16 size;			/* Table size in bytes */
+	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
+	u32 miniport_data;		/* Crap */
+	u8 rfu[11];
+	u8 checksum;			/* Modulo 256 checksum must give zero */
+	struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+extern void pcibios_irq_init(void);
+extern void pcibios_fixup_irqs(void);
+extern void pcibios_enable_irq(struct pci_dev *dev);
+
+#endif /* PCI_ASB2305_H */
diff --git a/arch/mn10300/unit-asb2305/pci-iomap.c b/arch/mn10300/unit-asb2305/pci-iomap.c
new file mode 100644
index 000000000000..dbceae4307da
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/pci-iomap.c
@@ -0,0 +1,31 @@
+/* ASB2305 PCI I/O mapping handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/pci.h>
+#include <linux/module.h>
+
+/*
+ * Create a virtual mapping cookie for a PCI BAR (memory or IO)
+ */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	unsigned long start = pci_resource_start(dev, bar);
+	unsigned long len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len || !start)
+		return NULL;
+
+	if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM))
+		return (void __iomem *) start;
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_iomap);
diff --git a/arch/mn10300/unit-asb2305/pci-irq.c b/arch/mn10300/unit-asb2305/pci-irq.c
new file mode 100644
index 000000000000..58cfb44f0acf
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/pci-irq.c
@@ -0,0 +1,51 @@
+/* PCI IRQ routing on the MN103E010 based ASB2305
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ *
+ * This is simple: All PCI interrupts route through the CPU's XIRQ1 pin [IRQ 35]
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include "pci-asb2305.h"
+
+void __init pcibios_irq_init(void)
+{
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+	struct pci_dev *dev = NULL;
+	u8 line, pin;
+
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+		if (pin) {
+			dev->irq = XIRQ1;
+			pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+					      dev->irq);
+		}
+		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
+	}
+}
+
+void __init pcibios_penalize_isa_irq(int irq)
+{
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+}
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
new file mode 100644
index 000000000000..1a86425fec42
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -0,0 +1,545 @@
+/* ASB2305 PCI support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Derived from arch/i386/kernel/pci-pc.c
+ *	(c) 1999--2000 Martin Mares <mj@suse.cz>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include "pci-asb2305.h"
+
+unsigned int pci_probe = 1;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus;
+struct pci_ops *pci_root_ops;
+
+/*
+ * Functions for accessing PCI configuration space
+ */
+
+#define CONFIG_CMD(bus, devfn, where) \
+	(0x80000000 | (bus->number << 16) | (devfn << 8) | (where & ~3))
+
+#define MEM_PAGING_REG	(*(volatile __u32 *) 0xBFFFFFF4)
+#define CONFIG_ADDRESS	(*(volatile __u32 *) 0xBFFFFFF8)
+#define CONFIG_DATAL(X)	(*(volatile __u32 *) 0xBFFFFFFC)
+#define CONFIG_DATAW(X)	(*(volatile __u16 *) (0xBFFFFFFC + ((X) & 2)))
+#define CONFIG_DATAB(X)	(*(volatile __u8  *) (0xBFFFFFFC + ((X) & 3)))
+
+#define BRIDGEREGB(X)	(*(volatile __u8  *) (0xBE040000 + (X)))
+#define BRIDGEREGW(X)	(*(volatile __u16 *) (0xBE040000 + (X)))
+#define BRIDGEREGL(X)	(*(volatile __u32 *) (0xBE040000 + (X)))
+
+static inline int __query(const struct pci_bus *bus, unsigned int devfn)
+{
+#if 0
+	return bus->number == 0 && (devfn == PCI_DEVFN(0, 0));
+	return bus->number == 1;
+	return bus->number == 0 &&
+		(devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(3, 0));
+#endif
+	return 1;
+}
+
+/*
+ * translate Linuxcentric addresses to PCI bus addresses
+ */
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			     struct resource *res)
+{
+	if (res->flags & IORESOURCE_IO) {
+		region->start = (res->start & 0x00ffffff);
+		region->end   = (res->end   & 0x00ffffff);
+	}
+
+	if (res->flags & IORESOURCE_MEM) {
+		region->start = (res->start & 0x03ffffff) | MEM_PAGING_REG;
+		region->end   = (res->end   & 0x03ffffff) | MEM_PAGING_REG;
+	}
+
+#if 0
+	printk(KERN_DEBUG "RES->BUS: %lx-%lx => %lx-%lx\n",
+	       res->start, res->end, region->start, region->end);
+#endif
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+/*
+ * translate PCI bus addresses to Linuxcentric addresses
+ */
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			     struct pci_bus_region *region)
+{
+	if (res->flags & IORESOURCE_IO) {
+		res->start = (region->start & 0x00ffffff) | 0xbe000000;
+		res->end   = (region->end   & 0x00ffffff) | 0xbe000000;
+	}
+
+	if (res->flags & IORESOURCE_MEM) {
+		res->start = (region->start & 0x03ffffff) | 0xb8000000;
+		res->end   = (region->end   & 0x03ffffff) | 0xb8000000;
+	}
+
+#if 0
+	printk(KERN_INFO "BUS->RES: %lx-%lx => %lx-%lx\n",
+	       region->start, region->end, res->start, res->end);
+#endif
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+/*
+ *
+ */
+static int pci_ampci_read_config_byte(struct pci_bus *bus, unsigned int devfn,
+				      int where, u32 *_value)
+{
+	u32 rawval, value;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		value = BRIDGEREGB(where);
+		__pcbdebug("=> %02hx", &BRIDGEREGL(where), value);
+	} else {
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		value = CONFIG_DATAB(where);
+		if (__query(bus, devfn))
+			__pcidebug("=> %02hx", bus, devfn, where, value);
+	}
+
+	*_value = value;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_read_config_word(struct pci_bus *bus, unsigned int devfn,
+				      int where, u32 *_value)
+{
+	u32 rawval, value;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		value = BRIDGEREGW(where);
+		__pcbdebug("=> %04hx", &BRIDGEREGL(where), value);
+	} else {
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		value = CONFIG_DATAW(where);
+		if (__query(bus, devfn))
+			__pcidebug("=> %04hx", bus, devfn, where, value);
+	}
+
+	*_value = value;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_read_config_dword(struct pci_bus *bus, unsigned int devfn,
+				       int where, u32 *_value)
+{
+	u32 rawval, value;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		value = BRIDGEREGL(where);
+		__pcbdebug("=> %08x", &BRIDGEREGL(where), value);
+	} else {
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		value = CONFIG_DATAL(where);
+		if (__query(bus, devfn))
+			__pcidebug("=> %08x", bus, devfn, where, value);
+	}
+
+	*_value = value;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_write_config_byte(struct pci_bus *bus, unsigned int devfn,
+				       int where, u8 value)
+{
+	u32 rawval;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		__pcbdebug("<= %02x", &BRIDGEREGB(where), value);
+		BRIDGEREGB(where) = value;
+	} else {
+		if (bus->number == 0 &&
+		    (devfn == PCI_DEVFN(2, 0) && devfn == PCI_DEVFN(3, 0))
+		    )
+			__pcidebug("<= %02x", bus, devfn, where, value);
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		CONFIG_DATAB(where) = value;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_write_config_word(struct pci_bus *bus, unsigned int devfn,
+				       int where, u16 value)
+{
+	u32 rawval;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		__pcbdebug("<= %04hx", &BRIDGEREGW(where), value);
+		BRIDGEREGW(where) = value;
+	} else {
+		if (__query(bus, devfn))
+			__pcidebug("<= %04hx", bus, devfn, where, value);
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		CONFIG_DATAW(where) = value;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_write_config_dword(struct pci_bus *bus, unsigned int devfn,
+					int where, u32 value)
+{
+	u32 rawval;
+
+	if (bus->number == 0 && devfn == PCI_DEVFN(0, 0)) {
+		__pcbdebug("<= %08x", &BRIDGEREGL(where), value);
+		BRIDGEREGL(where) = value;
+	} else {
+		if (__query(bus, devfn))
+			__pcidebug("<= %08x", bus, devfn, where, value);
+		CONFIG_ADDRESS = CONFIG_CMD(bus, devfn, where);
+		rawval = CONFIG_ADDRESS;
+		CONFIG_DATAL(where) = value;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_ampci_read_config(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 *val)
+{
+	switch (size) {
+	case 1:
+		return pci_ampci_read_config_byte(bus, devfn, where, val);
+	case 2:
+		return pci_ampci_read_config_word(bus, devfn, where, val);
+	case 4:
+		return pci_ampci_read_config_dword(bus, devfn, where, val);
+	default:
+		BUG();
+		return -EOPNOTSUPP;
+	}
+}
+
+static int pci_ampci_write_config(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 val)
+{
+	switch (size) {
+	case 1:
+		return pci_ampci_write_config_byte(bus, devfn, where, val);
+	case 2:
+		return pci_ampci_write_config_word(bus, devfn, where, val);
+	case 4:
+		return pci_ampci_write_config_dword(bus, devfn, where, val);
+	default:
+		BUG();
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct pci_ops pci_direct_ampci = {
+	pci_ampci_read_config,
+	pci_ampci_write_config,
+};
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __init pci_sanity_check(struct pci_ops *o)
+{
+	struct pci_bus bus;		/* Fake bus and device */
+	u32 x;
+
+	bus.number = 0;
+
+	if ((!o->read(&bus, 0, PCI_CLASS_DEVICE, 2, &x) &&
+	     (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+	    (!o->read(&bus, 0, PCI_VENDOR_ID, 2, &x) &&
+	     (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+		return 1;
+
+	printk(KERN_ERROR "PCI: Sanity check failed\n");
+	return 0;
+}
+
+static int __init pci_check_direct(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/*
+	 * Check if access works.
+	 */
+	if (pci_sanity_check(&pci_direct_ampci)) {
+		local_irq_restore(flags);
+		printk(KERN_INFO "PCI: Using configuration ampci\n");
+		request_mem_region(0xBE040000, 256, "AMPCI bridge");
+		request_mem_region(0xBFFFFFF4, 12, "PCI ampci");
+		return 0;
+	}
+
+	local_irq_restore(flags);
+	return -ENODEV;
+}
+
+static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
+{
+	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
+	struct resource *devr = &dev->resource[idx];
+
+	if (dev->bus) {
+		for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
+			struct resource *busr = dev->bus->resource[i];
+
+			if (!busr || (busr->flags ^ devr->flags) & type_mask)
+				continue;
+
+			if (devr->start &&
+			    devr->start >= busr->start &&
+			    devr->end <= busr->end)
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+	struct pci_bus_region region;
+	int i;
+	int limit;
+
+	if (dev->bus->number != 0)
+		return;
+
+	limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ?
+		PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES;
+
+	for (i = 0; i < limit; i++) {
+		if (!dev->resource[i].flags)
+			continue;
+
+		region.start = dev->resource[i].start;
+		region.end = dev->resource[i].end;
+		pcibios_bus_to_resource(dev, &dev->resource[i], &region);
+		if (is_valid_resource(dev, i))
+			pci_claim_resource(dev, i);
+	}
+}
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	if (bus->self) {
+		pci_read_bridge_bases(bus);
+		pcibios_fixup_device_resources(bus->self);
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		pcibios_fixup_device_resources(dev);
+}
+
+/*
+ * Initialization. Try all known PCI access methods. Note that we support
+ * using both PCI BIOS and direct access: in such cases, we use I/O ports
+ * to access config space, but we still keep BIOS order of cards to be
+ * compatible with 2.0.X. This should go away some day.
+ */
+static int __init pcibios_init(void)
+{
+	ioport_resource.start	= 0xA0000000;
+	ioport_resource.end	= 0xDFFFFFFF;
+	iomem_resource.start	= 0xA0000000;
+	iomem_resource.end	= 0xDFFFFFFF;
+
+	if (!pci_probe)
+		return 0;
+
+	if (pci_check_direct() < 0) {
+		printk(KERN_WARNING "PCI: No PCI bus detected\n");
+		return 0;
+	}
+
+	printk(KERN_INFO "PCI: Probing PCI hardware [mempage %08x]\n",
+	       MEM_PAGING_REG);
+
+	{
+#if 0
+		static struct pci_bus am33_root_bus = {
+			.children  = LIST_HEAD_INIT(am33_root_bus.children),
+			.devices   = LIST_HEAD_INIT(am33_root_bus.devices),
+			.number    = 0,
+			.secondary = 0,
+			.resource = { &ioport_resource, &iomem_resource },
+		};
+
+		am33_root_bus.ops = pci_root_ops;
+		list_add_tail(&am33_root_bus.node, &pci_root_buses);
+
+		am33_root_bus.subordinate = pci_do_scan_bus(0);
+
+		pci_root_bus = &am33_root_bus;
+#else
+		pci_root_bus = pci_scan_bus(0, &pci_direct_ampci, NULL);
+#endif
+	}
+
+	pcibios_irq_init();
+	pcibios_fixup_irqs();
+#if 0
+	pcibios_resource_survey();
+#endif
+	return 0;
+}
+
+arch_initcall(pcibios_init);
+
+char *__init pcibios_setup(char *str)
+{
+	if (!strcmp(str, "off")) {
+		pci_probe = 0;
+		return NULL;
+
+	} else if (!strncmp(str, "lastbus=", 8)) {
+		pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+		return NULL;
+	}
+
+	return str;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	int err;
+
+	err = pcibios_enable_resources(dev, mask);
+	if (err == 0)
+		pcibios_enable_irq(dev);
+	return err;
+}
+
+/*
+ * disable the ethernet chipset
+ */
+static void __init unit_disable_pcnet(struct pci_bus *bus, struct pci_ops *o)
+{
+	u32 x;
+
+	bus->number = 0;
+
+	o->read (bus, PCI_DEVFN(2, 0), PCI_COMMAND,		2, &x);
+	x |= PCI_COMMAND_MASTER |
+		PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_SERR | PCI_COMMAND_PARITY;
+	o->write(bus, PCI_DEVFN(2, 0), PCI_COMMAND,		2, x);
+	o->read (bus, PCI_DEVFN(2, 0), PCI_COMMAND,		2, &x);
+	o->write(bus, PCI_DEVFN(2, 0), PCI_BASE_ADDRESS_0,	4, 0x00030001);
+	o->read (bus, PCI_DEVFN(2, 0), PCI_BASE_ADDRESS_0,	4, &x);
+
+#define RDP (*(volatile u32 *) 0xBE030010)
+#define RAP (*(volatile u32 *) 0xBE030014)
+#define __set_RAP(X) do { RAP = (X); x = RAP; } while (0)
+#define __set_RDP(X) do { RDP = (X); x = RDP; } while (0)
+#define __get_RDP() ({ RDP & 0xffff; })
+
+	__set_RAP(0);
+	__set_RDP(0x0004);	/* CSR0 = STOP */
+
+	__set_RAP(88);		/* check CSR88 indicates an Am79C973 */
+	BUG_ON(__get_RDP() != 0x5003);
+
+	for (x = 0; x < 100; x++)
+		asm volatile("nop");
+
+	__set_RDP(0x0004);	/* CSR0 = STOP */
+}
+
+/*
+ * initialise the unit hardware
+ */
+asmlinkage void __init unit_pci_init(void)
+{
+	struct pci_bus bus;		/* Fake bus and device */
+	struct pci_ops *o = &pci_direct_ampci;
+	u32 x;
+
+	set_intr_level(XIRQ1, GxICR_LEVEL_3);
+
+	memset(&bus, 0, sizeof(bus));
+
+	MEM_PAGING_REG = 0xE8000000;
+
+	/* we need to set up the bridge _now_ or we won't be able to access the
+	 * PCI config registers
+	 */
+	BRIDGEREGW(PCI_COMMAND) |=
+		PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+		PCI_COMMAND_MEMORY | PCI_COMMAND_IO | PCI_COMMAND_MASTER;
+	BRIDGEREGW(PCI_STATUS)		= 0xF800;
+	BRIDGEREGB(PCI_LATENCY_TIMER)	= 0x10;
+	BRIDGEREGL(PCI_BASE_ADDRESS_0)	= 0x80000000;
+	BRIDGEREGB(PCI_INTERRUPT_LINE)	= 1;
+	BRIDGEREGL(0x48)		= 0x98000000;	/* AMPCI base addr */
+	BRIDGEREGB(0x41)		= 0x00;		/* secondary bus
+							 * number */
+	BRIDGEREGB(0x42)		= 0x01;		/* subordinate bus
+							 * number */
+	BRIDGEREGB(0x44)		= 0x01;
+	BRIDGEREGL(0x50)		= 0x00000001;
+	BRIDGEREGL(0x58)		= 0x00001002;
+	BRIDGEREGL(0x5C)		= 0x00000011;
+
+	/* we also need to set up the PCI-PCI bridge */
+	bus.number = 0;
+
+	/* IO: 0x00000000-0x00020000 */
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_COMMAND,		2, &x);
+	x |= PCI_COMMAND_MASTER |
+		PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_SERR | PCI_COMMAND_PARITY;
+	o->write(&bus, PCI_DEVFN(3, 0), PCI_COMMAND,		2, x);
+
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_IO_BASE,		1, &x);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_IO_BASE_UPPER16,	4, &x);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_MEMORY_BASE,	4, &x);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_PREF_MEMORY_BASE,	4, &x);
+
+	o->write(&bus, PCI_DEVFN(3, 0), PCI_IO_BASE,		1, 0x01);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_IO_BASE,		1, &x);
+	o->write(&bus, PCI_DEVFN(3, 0), PCI_IO_BASE_UPPER16,	4, 0x00020000);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_IO_BASE_UPPER16,	4, &x);
+	o->write(&bus, PCI_DEVFN(3, 0), PCI_MEMORY_BASE,	4, 0xEBB0EA00);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_MEMORY_BASE,	4, &x);
+	o->write(&bus, PCI_DEVFN(3, 0), PCI_PREF_MEMORY_BASE,	4, 0xE9F0E800);
+	o->read (&bus, PCI_DEVFN(3, 0), PCI_PREF_MEMORY_BASE,	4, &x);
+
+	unit_disable_pcnet(&bus, o);
+}
diff --git a/arch/mn10300/unit-asb2305/unit-init.c b/arch/mn10300/unit-asb2305/unit-init.c
new file mode 100644
index 000000000000..6a352414a358
--- /dev/null
+++ b/arch/mn10300/unit-asb2305/unit-init.c
@@ -0,0 +1,61 @@
+/* ASB2305 Initialisation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/processor.h>
+#include <asm/cpu/intctl-regs.h>
+#include <asm/cpu/rtc-regs.h>
+#include <asm/cpu/serial-regs.h>
+#include <asm/unit/serial.h>
+
+/*
+ * initialise some of the unit hardware before gdbstub is set up
+ */
+asmlinkage void __init unit_init(void)
+{
+#ifndef CONFIG_GDBSTUB_ON_TTYSx
+	/* set the 16550 interrupt line to level 3 if not being used for GDB */
+	set_intr_level(XIRQ0, GxICR_LEVEL_3);
+#endif
+}
+
+/*
+ * initialise the rest of the unit hardware after gdbstub is ready
+ */
+void __init unit_setup(void)
+{
+#ifdef CONFIG_PCI
+	unit_pci_init();
+#endif
+}
+
+/*
+ * initialise the external interrupts used by a unit of this type
+ */
+void __init unit_init_IRQ(void)
+{
+	unsigned int extnum;
+
+	for (extnum = 0; extnum < NR_XIRQS; extnum++) {
+		switch (GET_XIRQ_TRIGGER(extnum)) {
+		case XIRQ_TRIGGER_HILEVEL:
+		case XIRQ_TRIGGER_LOWLEVEL:
+			set_irq_handler(XIRQ2IRQ(extnum), handle_level_irq);
+			break;
+		default:
+			break;
+		}
+	}
+}
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index f32e031dcb27..708c5ae13b24 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -1,6 +1,4 @@
 /*
- * $Id: analog.c,v 1.68 2002/01/22 20:18:32 vojtech Exp $
- *
  *  Copyright (c) 1996-2001 Vojtech Pavlik
  */
 
@@ -164,6 +162,10 @@ static unsigned int get_time_pit(void)
 #define GET_TIME(x)	do { x = get_cycles(); } while (0)
 #define DELTA(x,y)	((y)-(x))
 #define TIME_NAME	"PCC"
+#elif defined(CONFIG_MN10300)
+#define GET_TIME(x)	do { x = get_cycles(); } while (0)
+#define DELTA(x, y)	((x) - (y))
+#define TIME_NAME	"TSC"
 #else
 #define FAKE_TIME
 static unsigned long analog_faketime = 0;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9cc25fd80b60..50c2b60e1fee 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -879,7 +879,8 @@ config SMC91X
 	tristate "SMC 91C9x/91C1xxx support"
 	select CRC32
 	select MII
-	depends on ARM || REDWOOD_5 || REDWOOD_6 || M32R || SUPERH || SOC_AU1X00 || BLACKFIN
+	depends on ARM || REDWOOD_5 || REDWOOD_6 || M32R || SUPERH || \
+		SOC_AU1X00 || BLACKFIN || MN10300
 	help
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 271c28dc9baa..51d4134b37b1 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -450,8 +450,20 @@ static inline void LPD7_SMC_outsw (unsigned char* a, int r,
 #define SMC_outsl(a, r, p, l)	writesl((a) + (r), p, l)
 #define SMC_IRQ_FLAGS		(-1)	/* from resource */
 
+#elif defined(CONFIG_MN10300)
+
+/*
+ * MN10300/AM33 configuration
+ */
+
+#include <asm/unit/smc91111.h>
+
 #else
 
+/*
+ * Default configuration
+ */
+
 #define SMC_CAN_USE_8BIT	1
 #define SMC_CAN_USE_16BIT	1
 #define SMC_CAN_USE_32BIT	1
diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index d449b150930e..b7bcdcc5c724 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -35,7 +35,8 @@ if PARPORT
 
 config PARPORT_PC
 	tristate "PC-style hardware"
-	depends on (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV && (!M68K || ISA)
+	depends on (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV && \
+		(!M68K || ISA) && !MN10300
 	---help---
 	  You should say Y here if you have a PC-style parallel port. All
 	  IBM PC compatible computers and some Alphas have PC-style
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 9f04d17576d6..4d1ce2e7361e 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_PPC32) += setup-irq.o
 obj-$(CONFIG_PPC) += setup-bus.o
 obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
 obj-$(CONFIG_X86_VISWS) += setup-irq.o
+obj-$(CONFIG_MN10300) += setup-bus.o
 
 #
 # ACPI Related PCI FW Functions
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 2b53d1f56281..06f87b04f207 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
 
 config VGA_CONSOLE
 	bool "VGA text console" if EMBEDDED || !X86
-	depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !ARCH_VERSATILE && !SUPERH && !BLACKFIN && !AVR32
+	depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !ARCH_VERSATILE && !SUPERH && !BLACKFIN && !AVR32 && !MN10300
 	default y
 	help
 	  Saying Y here will allow you to use Linux in text mode through a
diff --git a/include/asm-mn10300/.gitignore b/include/asm-mn10300/.gitignore
new file mode 100644
index 000000000000..0f87ba790e26
--- /dev/null
+++ b/include/asm-mn10300/.gitignore
@@ -0,0 +1,2 @@
+proc
+unit
diff --git a/include/asm-mn10300/Kbuild b/include/asm-mn10300/Kbuild
new file mode 100644
index 000000000000..79384c537dc6
--- /dev/null
+++ b/include/asm-mn10300/Kbuild
@@ -0,0 +1,5 @@
+include include/asm-generic/Kbuild.asm
+
+unifdef-y += termios.h
+unifdef-y += ptrace.h
+unifdef-y += page.h
diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h
new file mode 100644
index 000000000000..27c9690b9574
--- /dev/null
+++ b/include/asm-mn10300/atomic.h
@@ -0,0 +1,166 @@
+/* MN10300 Atomic counter operations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_ATOMIC_H
+#define _ASM_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#error not SMP safe
+#endif
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct {
+	int	counter;
+} atomic_t;
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#ifdef __KERNEL__
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_read(v)	((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_set(v, i) (((v)->counter) = (i))
+
+#include <asm/system.h>
+
+/**
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp += i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+/**
+ * atomic_sub_return - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp -= i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+static inline int atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	atomic_add_return(i, v);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	atomic_sub_return(i, v);
+}
+
+static inline void atomic_inc(atomic_t *v)
+{
+	atomic_add_return(1, v);
+}
+
+static inline void atomic_dec(atomic_t *v)
+{
+	atomic_sub_return(1, v);
+}
+
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
+#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
+
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+{
+	unsigned long flags;
+
+	mask = ~mask;
+	local_irq_save(flags);
+	*addr &= mask;
+	local_irq_restore(flags);
+}
+
+#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
+#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
+
+/* Atomic operations are already serializing on MN10300??? */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic.h>
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_ATOMIC_H */
diff --git a/include/asm-mn10300/auxvec.h b/include/asm-mn10300/auxvec.h
new file mode 100644
index 000000000000..4fdb60b2ae39
--- /dev/null
+++ b/include/asm-mn10300/auxvec.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_AUXVEC_H
+#define _ASM_AUXVEC_H
+
+#endif
diff --git a/include/asm-mn10300/bitops.h b/include/asm-mn10300/bitops.h
new file mode 100644
index 000000000000..cc6d40c05cf3
--- /dev/null
+++ b/include/asm-mn10300/bitops.h
@@ -0,0 +1,229 @@
+/* MN10300 bit operations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ *
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+#ifndef __ASM_BITOPS_H
+#define __ASM_BITOPS_H
+
+#include <asm/cpu-regs.h>
+
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/*
+ * set bit
+ */
+#define __set_bit(nr, addr)					\
+({								\
+	volatile unsigned char *_a = (unsigned char *)(addr);	\
+	const unsigned shift = (nr) & 7;			\
+	_a += (nr) >> 3;					\
+								\
+	asm volatile("bset %2,(%1) # set_bit reg"		\
+		     : "=m"(*_a)				\
+		     : "a"(_a), "d"(1 << shift),  "m"(*_a)	\
+		     : "memory", "cc");				\
+})
+
+#define set_bit(nr, addr) __set_bit((nr), (addr))
+
+/*
+ * clear bit
+ */
+#define ___clear_bit(nr, addr)					\
+({								\
+	volatile unsigned char *_a = (unsigned char *)(addr);	\
+	const unsigned shift = (nr) & 7;			\
+	_a += (nr) >> 3;					\
+								\
+	asm volatile("bclr %2,(%1) # clear_bit reg"		\
+		     : "=m"(*_a)				\
+		     : "a"(_a), "d"(1 << shift), "m"(*_a)	\
+		     : "memory", "cc");				\
+})
+
+#define clear_bit(nr, addr) ___clear_bit((nr), (addr))
+
+
+static inline void __clear_bit(int nr, volatile void *addr)
+{
+	unsigned int *a = (unsigned int *) addr;
+	int mask;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+	*a &= ~mask;
+}
+
+/*
+ * test bit
+ */
+static inline int test_bit(int nr, const volatile void *addr)
+{
+	return 1UL & (((const unsigned int *) addr)[nr >> 5] >> (nr & 31));
+}
+
+/*
+ * change bit
+ */
+static inline void __change_bit(int nr, volatile void *addr)
+{
+	int	mask;
+	unsigned int *a = (unsigned int *) addr;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+	*a ^= mask;
+}
+
+extern void change_bit(int nr, volatile void *addr);
+
+/*
+ * test and set bit
+ */
+#define __test_and_set_bit(nr,addr)				\
+({								\
+	volatile unsigned char *_a = (unsigned char *)(addr);	\
+	const unsigned shift = (nr) & 7;			\
+	unsigned epsw;						\
+	_a += (nr) >> 3;					\
+								\
+	asm volatile("bset %3,(%2) # test_set_bit reg\n"	\
+		     "mov epsw,%1"				\
+		     : "=m"(*_a), "=d"(epsw)			\
+		     : "a"(_a), "d"(1 << shift), "m"(*_a)	\
+		     : "memory", "cc");				\
+								\
+	!(epsw & EPSW_FLAG_Z);					\
+})
+
+#define test_and_set_bit(nr, addr) __test_and_set_bit((nr), (addr))
+
+/*
+ * test and clear bit
+ */
+#define __test_and_clear_bit(nr, addr)				\
+({								\
+        volatile unsigned char *_a = (unsigned char *)(addr);	\
+	const unsigned shift = (nr) & 7;			\
+	unsigned epsw;						\
+	_a += (nr) >> 3;					\
+								\
+	asm volatile("bclr %3,(%2) # test_clear_bit reg\n"	\
+		     "mov epsw,%1"				\
+		     : "=m"(*_a), "=d"(epsw)			\
+		     : "a"(_a), "d"(1 << shift), "m"(*_a)	\
+		     : "memory", "cc");				\
+								\
+	!(epsw & EPSW_FLAG_Z);					\
+})
+
+#define test_and_clear_bit(nr, addr) __test_and_clear_bit((nr), (addr))
+
+/*
+ * test and change bit
+ */
+static inline int __test_and_change_bit(int nr, volatile void *addr)
+{
+	int	mask, retval;
+	unsigned int *a = (unsigned int *)addr;
+
+	a += nr >> 5;
+	mask = 1 << (nr & 0x1f);
+	retval = (mask & *a) != 0;
+	*a ^= mask;
+
+	return retval;
+}
+
+extern int test_and_change_bit(int nr, volatile void *addr);
+
+#include <asm-generic/bitops/lock.h>
+
+#ifdef __KERNEL__
+
+/**
+ * __ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 31..0 to indicate bit 31..0 most least significant bit set
+ * - if no bits are set in x, the result is undefined
+ */
+static inline __attribute__((const))
+unsigned long __ffs(unsigned long x)
+{
+	int bit;
+	asm("bsch %2,%0" : "=r"(bit) : "0"(0), "r"(x & -x));
+	return bit;
+}
+
+/*
+ * special slimline version of fls() for calculating ilog2_u32()
+ * - note: no protection against n == 0
+ */
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+	int bit;
+	asm("bsch %2,%0" : "=r"(bit) : "0"(0), "r"(n));
+	return bit;
+}
+
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs:
+ * - return 32..1 to indicate bit 31..0 most significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int fls(int x)
+{
+	return (x != 0) ? __ilog2_u32(x) + 1 : 0;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 32..1 to indicate bit 31..0 most least significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int ffs(int x)
+{
+	/* Note: (x & -x) gives us a mask that is the least significant
+	 * (rightmost) 1-bit of the value in x.
+	 */
+	return fls(x & -x);
+}
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#define ext2_set_bit_atomic(lock, nr, addr) \
+	test_and_set_bit((nr) ^ 0x18, (addr))
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+	test_and_clear_bit((nr) ^ 0x18, (addr))
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_BITOPS_H */
diff --git a/include/asm-mn10300/bug.h b/include/asm-mn10300/bug.h
new file mode 100644
index 000000000000..4fcf3384e259
--- /dev/null
+++ b/include/asm-mn10300/bug.h
@@ -0,0 +1,35 @@
+/* MN10300 Kernel bug reporting
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_BUG_H
+#define _ASM_BUG_H
+
+/*
+ * Tell the user there is some problem.
+ */
+#define _debug_bug_trap()					\
+do {								\
+	asm volatile(						\
+		"	syscall 15			\n"	\
+		"0:					\n"	\
+		"	.section __bug_table,\"a\"	\n"	\
+		"	.long 0b,%0,%1			\n"	\
+		"	.previous			\n"	\
+		:						\
+		: "i"(__FILE__), "i"(__LINE__)			\
+		);						\
+} while (0)
+
+#define BUG() _debug_bug_trap()
+
+#define HAVE_ARCH_BUG
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_BUG_H */
diff --git a/include/asm-mn10300/bugs.h b/include/asm-mn10300/bugs.h
new file mode 100644
index 000000000000..31c8bc592b47
--- /dev/null
+++ b/include/asm-mn10300/bugs.h
@@ -0,0 +1,20 @@
+/* MN10300 Checks for architecture-dependent bugs
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_BUGS_H
+#define _ASM_BUGS_H
+
+#include <asm/processor.h>
+
+static inline void __init check_bugs(void)
+{
+}
+
+#endif /* _ASM_BUGS_H */
diff --git a/include/asm-mn10300/busctl-regs.h b/include/asm-mn10300/busctl-regs.h
new file mode 100644
index 000000000000..1632aef73401
--- /dev/null
+++ b/include/asm-mn10300/busctl-regs.h
@@ -0,0 +1,151 @@
+/* AM33v2 on-board bus controller registers
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_BUSCTL_REGS_H
+#define _ASM_BUSCTL_REGS_H
+
+#include <asm/cpu-regs.h>
+
+#ifdef __KERNEL__
+
+/* bus controller registers */
+#define BCCR			__SYSREG(0xc0002000, u32)	/* bus controller control reg */
+#define BCCR_B0AD		0x00000003	/* block 0 (80000000-83ffffff) bus allocation */
+#define BCCR_B1AD		0x0000000c	/* block 1 (84000000-87ffffff) bus allocation */
+#define BCCR_B2AD		0x00000030	/* block 2 (88000000-8bffffff) bus allocation */
+#define BCCR_B3AD		0x000000c0	/* block 3 (8c000000-8fffffff) bus allocation */
+#define BCCR_B4AD		0x00000300	/* block 4 (90000000-93ffffff) bus allocation */
+#define BCCR_B5AD		0x00000c00	/* block 5 (94000000-97ffffff) bus allocation */
+#define BCCR_B6AD		0x00003000	/* block 6 (98000000-9bffffff) bus allocation */
+#define BCCR_B7AD		0x0000c000	/* block 7 (9c000000-9fffffff) bus allocation */
+#define BCCR_BxAD_EXBUS		0x0		/* - direct to system bus controller */
+#define BCCR_BxAD_OPEXBUS	0x1		/* - direct to memory bus controller */
+#define BCCR_BxAD_OCMBUS	0x2		/* - direct to on chip memory */
+#define BCCR_API		0x00070000	/* bus arbitration priority */
+#define BCCR_API_DMACICD	0x00000000	/* - DMA > CI > CD */
+#define BCCR_API_DMACDCI	0x00010000	/* - DMA > CD > CI */
+#define BCCR_API_CICDDMA	0x00020000	/* - CI > CD > DMA */
+#define BCCR_API_CDCIDMA	0x00030000	/* - CD > CI > DMA */
+#define BCCR_API_ROUNDROBIN	0x00040000	/* - round robin */
+#define BCCR_BEPRI_DMACICD	0x00c00000	/* bus error address priority */
+#define BCCR_BEPRI_DMACDCI	0x00000000	/* - DMA > CI > CD */
+#define BCCR_BEPRI_CICDDMA	0x00400000	/* - DMA > CD > CI */
+#define BCCR_BEPRI_CDCIDMA	0x00800000	/* - CI > CD > DMA */
+#define BCCR_BEPRI		0x00c00000	/* - CD > CI > DMA */
+#define BCCR_TMON		0x03000000	/* timeout value settings */
+#define BCCR_TMON_16IOCLK	0x00000000	/* - 16 IOCLK cycles */
+#define BCCR_TMON_256IOCLK	0x01000000	/* - 256 IOCLK cycles */
+#define BCCR_TMON_4096IOCLK	0x02000000	/* - 4096 IOCLK cycles */
+#define BCCR_TMON_65536IOCLK	0x03000000	/* - 65536 IOCLK cycles */
+#define BCCR_TMOE		0x10000000	/* timeout detection enable */
+
+#define BCBERR			__SYSREG(0xc0002010, u32)	/* bus error source reg */
+#define BCBERR_BESB		0x0000001f	/* erroneous access destination space */
+#define BCBERR_BESB_MON		0x00000001	/* - monitor space */
+#define BCBERR_BESB_IO		0x00000002	/* - IO bus */
+#define BCBERR_BESB_EX		0x00000004	/* - EX bus */
+#define BCBERR_BESB_OPEX	0x00000008	/* - OpEX bus */
+#define BCBERR_BESB_OCM		0x00000010	/* - on chip memory */
+#define BCBERR_BERW		0x00000100	/* type of access */
+#define BCBERR_BERW_WRITE	0x00000000	/* - write */
+#define BCBERR_BERW_READ	0x00000100	/* - read */
+#define BCBERR_BESD		0x00000200	/* error detector */
+#define BCBERR_BESD_BCU		0x00000000	/* - BCU detected error */
+#define BCBERR_BESD_SLAVE_BUS	0x00000200	/* - slave bus detected error */
+#define BCBERR_BEBST		0x00000400	/* type of access */
+#define BCBERR_BEBST_SINGLE	0x00000000	/* - single */
+#define BCBERR_BEBST_BURST	0x00000400	/* - burst */
+#define BCBERR_BEME		0x00000800	/* multiple bus error flag */
+#define BCBERR_BEMR		0x00007000	/* master bus that caused the error */
+#define BCBERR_BEMR_NOERROR	0x00000000	/* - no error */
+#define BCBERR_BEMR_CI		0x00001000	/* - CPU instruction fetch bus caused error */
+#define BCBERR_BEMR_CD		0x00002000	/* - CPU data bus caused error */
+#define BCBERR_BEMR_DMA		0x00004000	/* - DMA bus caused error */
+
+#define BCBEAR			__SYSREGC(0xc0002020, u32)	/* bus error address reg */
+
+/* system bus controller registers */
+#define SBBASE(X)		__SYSREG(0xd8c00100 + (X) * 0x10, u32)	/* SBC base addr regs */
+#define SBBASE_BE		0x00000001	/* bank enable */
+#define SBBASE_BAM		0x0000fffe	/* bank address mask [31:17] */
+#define SBBASE_BBA		0xfffe0000	/* bank base address [31:17] */
+
+#define SBCNTRL0(X)		__SYSREG(0xd8c00200 + (X) * 0x10, u32)	/* SBC bank ctrl0 regs */
+#define SBCNTRL0_WEH		0x00000f00	/* write enable hold */
+#define SBCNTRL0_REH		0x0000f000	/* read enable hold */
+#define SBCNTRL0_RWH		0x000f0000	/* SRW signal hold */
+#define SBCNTRL0_CSH		0x00f00000	/* chip select hold */
+#define SBCNTRL0_DAH		0x0f000000	/* data hold */
+#define SBCNTRL0_ADH		0xf0000000	/* address hold */
+
+#define SBCNTRL1(X)		__SYSREG(0xd8c00204 + (X) * 0x10, u32)	/* SBC bank ctrl1 regs */
+#define SBCNTRL1_WED		0x00000f00	/* write enable delay */
+#define SBCNTRL1_RED		0x0000f000	/* read enable delay */
+#define SBCNTRL1_RWD		0x000f0000	/* SRW signal delay */
+#define SBCNTRL1_ASW		0x00f00000	/* address strobe width */
+#define SBCNTRL1_CSD		0x0f000000	/* chip select delay */
+#define SBCNTRL1_ASD		0xf0000000	/* address strobe delay */
+
+#define SBCNTRL2(X)		__SYSREG(0xd8c00208 + (X) * 0x10, u32)	/* SBC bank ctrl2 regs */
+#define SBCNTRL2_WC		0x000000ff	/* wait count */
+#define SBCNTRL2_BWC		0x00000f00	/* burst wait count */
+#define SBCNTRL2_WM		0x01000000	/* wait mode setting */
+#define SBCNTRL2_WM_FIXEDWAIT	0x00000000	/* - fixed wait access */
+#define SBCNTRL2_WM_HANDSHAKE	0x01000000	/* - handshake access */
+#define SBCNTRL2_BM		0x02000000	/* bus synchronisation mode */
+#define SBCNTRL2_BM_SYNC	0x00000000	/* - synchronous mode */
+#define SBCNTRL2_BM_ASYNC	0x02000000	/* - asynchronous mode */
+#define SBCNTRL2_BW		0x04000000	/* bus width */
+#define SBCNTRL2_BW_32		0x00000000	/* - 32 bits */
+#define SBCNTRL2_BW_16		0x04000000	/* - 16 bits */
+#define SBCNTRL2_RWINV		0x08000000	/* R/W signal invert polarity */
+#define SBCNTRL2_RWINV_NORM	0x00000000	/* - normal (read high) */
+#define SBCNTRL2_RWINV_INV	0x08000000	/* - inverted (read low) */
+#define SBCNTRL2_BT		0x70000000	/* bus type setting */
+#define SBCNTRL2_BT_SRAM	0x00000000	/* - SRAM interface */
+#define SBCNTRL2_BT_ADMUX	0x00000000	/* - addr/data multiplexed interface */
+#define SBCNTRL2_BT_BROM	0x00000000	/* - burst ROM interface */
+#define SBCNTRL2_BTSE		0x80000000	/* burst enable */
+
+/* memory bus controller */
+#define SDBASE(X)		__SYSREG(0xda000008 + (X) * 0x4, u32)	/* MBC base addr regs */
+#define SDBASE_CE		0x00000001	/* chip enable */
+#define SDBASE_CBAM		0x0000fff0	/* chip base address mask [31:20] */
+#define SDBASE_CBAM_SHIFT	16
+#define SDBASE_CBA		0xfff00000	/* chip base address [31:20] */
+
+#define SDRAMBUS		__SYSREG(0xda000000, u32)	/* bus mode control reg */
+#define SDRAMBUS_REFEN		0x00000004	/* refresh enable */
+#define SDRAMBUS_TRC		0x00000018	/* refresh command delay time */
+#define SDRAMBUS_BSTPT		0x00000020	/* burst stop command enable */
+#define SDRAMBUS_PONSEQ		0x00000040	/* power on sequence */
+#define SDRAMBUS_SELFREQ	0x00000080	/* self-refresh mode request */
+#define SDRAMBUS_SELFON		0x00000100	/* self-refresh mode on */
+#define SDRAMBUS_SIZE		0x00030000	/* SDRAM size */
+#define SDRAMBUS_SIZE_64Mbit	0x00010000	/* 64Mbit SDRAM (x16) */
+#define SDRAMBUS_SIZE_128Mbit	0x00020000	/* 128Mbit SDRAM (x16) */
+#define SDRAMBUS_SIZE_256Mbit	0x00030000	/* 256Mbit SDRAM (x16) */
+#define SDRAMBUS_TRASWAIT	0x000c0000	/* row address precharge command cycle number */
+#define SDRAMBUS_REFNUM		0x00300000	/* refresh command number */
+#define SDRAMBUS_BSTWAIT	0x00c00000	/* burst stop command cycle */
+#define SDRAMBUS_SETWAIT	0x03000000	/* mode register setting command cycle */
+#define SDRAMBUS_PREWAIT	0x0c000000	/* precharge command cycle */
+#define SDRAMBUS_RASLATE	0x30000000	/* RAS latency */
+#define SDRAMBUS_CASLATE	0xc0000000	/* CAS latency */
+
+#define SDREFCNT		__SYSREG(0xda000004, u32)	/* refresh period reg */
+#define SDREFCNT_PERI		0x00000fff	/* refresh period */
+
+#define SDSHDW			__SYSREG(0xda000010, u32)	/* test reg */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_BUSCTL_REGS_H */
diff --git a/include/asm-mn10300/byteorder.h b/include/asm-mn10300/byteorder.h
new file mode 100644
index 000000000000..3c993cc625f8
--- /dev/null
+++ b/include/asm-mn10300/byteorder.h
@@ -0,0 +1,46 @@
+/* MN10300 Byte-order primitive construction
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_BYTEORDER_H
+#define _ASM_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+static inline __attribute__((const))
+__u32 ___arch__swab32(__u32 x)
+{
+	__u32 ret;
+	asm("swap %1,%0" : "=r" (ret) : "r" (x));
+	return ret;
+}
+
+static inline __attribute__((const))
+__u16 ___arch__swab16(__u16 x)
+{
+	__u16 ret;
+	asm("swaph %1,%0" : "=r" (ret) : "r" (x));
+	return ret;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab16(x) ___arch__swab16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#  define __BYTEORDER_HAS_U64__
+#  define __SWAB_64_THRU_32__
+#endif
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _ASM_BYTEORDER_H */
diff --git a/include/asm-mn10300/cache.h b/include/asm-mn10300/cache.h
new file mode 100644
index 000000000000..9e01122208a9
--- /dev/null
+++ b/include/asm-mn10300/cache.h
@@ -0,0 +1,54 @@
+/* MN10300 cache management registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_CACHE_H
+#define _ASM_CACHE_H
+
+#include <asm/cpu-regs.h>
+#include <asm/proc/cache.h>
+
+#ifndef __ASSEMBLY__
+#define L1_CACHE_DISPARITY	(L1_CACHE_NENTRIES * L1_CACHE_BYTES)
+#else
+#define L1_CACHE_DISPARITY	L1_CACHE_NENTRIES * L1_CACHE_BYTES
+#endif
+
+/* data cache purge registers
+ * - read from the register to unconditionally purge that cache line
+ * - write address & 0xffffff00 to conditionally purge that cache line
+ *   - clear LSB to request invalidation as well
+ */
+#define DCACHE_PURGE(WAY, ENTRY) \
+	__SYSREG(0xc8400000 + (WAY) * L1_CACHE_WAYDISP + \
+		 (ENTRY) * L1_CACHE_BYTES, u32)
+
+#define DCACHE_PURGE_WAY0(ENTRY) \
+	__SYSREG(0xc8400000 + 0 * L1_CACHE_WAYDISP + (ENTRY) * L1_CACHE_BYTES, u32)
+#define DCACHE_PURGE_WAY1(ENTRY) \
+	__SYSREG(0xc8400000 + 1 * L1_CACHE_WAYDISP + (ENTRY) * L1_CACHE_BYTES, u32)
+#define DCACHE_PURGE_WAY2(ENTRY) \
+	__SYSREG(0xc8400000 + 2 * L1_CACHE_WAYDISP + (ENTRY) * L1_CACHE_BYTES, u32)
+#define DCACHE_PURGE_WAY3(ENTRY) \
+	__SYSREG(0xc8400000 + 3 * L1_CACHE_WAYDISP + (ENTRY) * L1_CACHE_BYTES, u32)
+
+/* instruction cache access registers */
+#define ICACHE_DATA(WAY, ENTRY, OFF) \
+	__SYSREG(0xc8000000 + (WAY) * L1_CACHE_WAYDISP + (ENTRY) * 0x10 + (OFF) * 4, u32)
+#define ICACHE_TAG(WAY, ENTRY)	 \
+	__SYSREG(0xc8100000 + (WAY) * L1_CACHE_WAYDISP + (ENTRY) * 0x10, u32)
+
+/* instruction cache access registers */
+#define DCACHE_DATA(WAY, ENTRY, OFF) \
+	__SYSREG(0xc8200000 + (WAY) * L1_CACHE_WAYDISP + (ENTRY) * 0x10 + (OFF) * 4, u32)
+#define DCACHE_TAG(WAY, ENTRY)	 \
+	__SYSREG(0xc8300000 + (WAY) * L1_CACHE_WAYDISP + (ENTRY) * 0x10, u32)
+
+#endif /* _ASM_CACHE_H */
diff --git a/include/asm-mn10300/cacheflush.h b/include/asm-mn10300/cacheflush.h
new file mode 100644
index 000000000000..2db746a251f8
--- /dev/null
+++ b/include/asm-mn10300/cacheflush.h
@@ -0,0 +1,116 @@
+/* MN10300 Cache flushing
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_CACHEFLUSH_H
+#define _ASM_CACHEFLUSH_H
+
+#ifndef __ASSEMBLY__
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
+/*
+ * virtually-indexed cache managment (our cache is physically indexed)
+ */
+#define flush_cache_all()			do {} while (0)
+#define flush_cache_mm(mm)			do {} while (0)
+#define flush_cache_dup_mm(mm)			do {} while (0)
+#define flush_cache_range(mm, start, end)	do {} while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do {} while (0)
+#define flush_cache_vmap(start, end)		do {} while (0)
+#define flush_cache_vunmap(start, end)		do {} while (0)
+#define flush_dcache_page(page)			do {} while (0)
+#define flush_dcache_mmap_lock(mapping)		do {} while (0)
+#define flush_dcache_mmap_unlock(mapping)	do {} while (0)
+
+/*
+ * physically-indexed cache managment
+ */
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_page(struct vm_area_struct *vma, struct page *pg);
+
+#else
+
+#define flush_icache_range(start, end)		do {} while (0)
+#define flush_icache_page(vma, pg)		do {} while (0)
+
+#endif
+
+#define flush_icache_user_range(vma, pg, adr, len) \
+	flush_icache_range(adr, adr + len)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+	do {					\
+		memcpy(dst, src, len);		\
+		flush_icache_page(vma, page);	\
+	} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+/*
+ * primitive routines
+ */
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+extern void mn10300_icache_inv(void);
+extern void mn10300_dcache_inv(void);
+extern void mn10300_dcache_inv_page(unsigned start);
+extern void mn10300_dcache_inv_range(unsigned start, unsigned end);
+extern void mn10300_dcache_inv_range2(unsigned start, unsigned size);
+#ifdef CONFIG_MN10300_CACHE_WBACK
+extern void mn10300_dcache_flush(void);
+extern void mn10300_dcache_flush_page(unsigned start);
+extern void mn10300_dcache_flush_range(unsigned start, unsigned end);
+extern void mn10300_dcache_flush_range2(unsigned start, unsigned size);
+extern void mn10300_dcache_flush_inv(void);
+extern void mn10300_dcache_flush_inv_page(unsigned start);
+extern void mn10300_dcache_flush_inv_range(unsigned start, unsigned end);
+extern void mn10300_dcache_flush_inv_range2(unsigned start, unsigned size);
+#else
+#define mn10300_dcache_flush()				do {} while (0)
+#define mn10300_dcache_flush_page(start)		do {} while (0)
+#define mn10300_dcache_flush_range(start, end)		do {} while (0)
+#define mn10300_dcache_flush_range2(start, size)	do {} while (0)
+#define mn10300_dcache_flush_inv()			mn10300_dcache_inv()
+#define mn10300_dcache_flush_inv_page(start) \
+	mn10300_dcache_inv_page((start))
+#define mn10300_dcache_flush_inv_range(start, end) \
+	mn10300_dcache_inv_range((start), (end))
+#define mn10300_dcache_flush_inv_range2(start, size) \
+	mn10300_dcache_inv_range2((start), (size))
+#endif /* CONFIG_MN10300_CACHE_WBACK */
+#else
+#define mn10300_icache_inv()				do {} while (0)
+#define mn10300_dcache_inv()				do {} while (0)
+#define mn10300_dcache_inv_page(start)			do {} while (0)
+#define mn10300_dcache_inv_range(start, end)		do {} while (0)
+#define mn10300_dcache_inv_range2(start, size)		do {} while (0)
+#define mn10300_dcache_flush()				do {} while (0)
+#define mn10300_dcache_flush_inv_page(start)		do {} while (0)
+#define mn10300_dcache_flush_inv()			do {} while (0)
+#define mn10300_dcache_flush_inv_range(start, end)	do {} while (0)
+#define mn10300_dcache_flush_inv_range2(start, size)	do {} while (0)
+#define mn10300_dcache_flush_page(start)		do {} while (0)
+#define mn10300_dcache_flush_range(start, end)		do {} while (0)
+#define mn10300_dcache_flush_range2(start, size)	do {} while (0)
+#endif /* CONFIG_MN10300_CACHE_DISABLED */
+
+/*
+ * internal debugging function
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_CACHEFLUSH_H */
diff --git a/include/asm-mn10300/checksum.h b/include/asm-mn10300/checksum.h
new file mode 100644
index 000000000000..9fb2a8d8826a
--- /dev/null
+++ b/include/asm-mn10300/checksum.h
@@ -0,0 +1,86 @@
+/* MN10300 Optimised checksumming code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_CHECKSUM_H
+#define _ASM_CHECKSUM_H
+
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
+					int len, __wsum sum);
+extern __wsum csum_partial_copy_from_user(const void *src, void *dst,
+					  int len, __wsum sum,
+					  int *err_ptr);
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#define csum_partial_copy_fromuser csum_partial_copy
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+				__wsum sum);
+
+static inline __sum16 csum_fold(__wsum sum)
+{
+	asm(
+		"	add	%1,%0		\n"
+		"	addc	0xffff,%0	\n"
+		: "=r" (sum)
+		: "r" (sum << 16), "0" (sum & 0xffff0000)
+		: "cc"
+	    );
+	return (~sum) >> 16;
+}
+
+static inline __wsum csum_tcpudp_nofold(unsigned long saddr,
+					unsigned long daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum)
+{
+	__wsum tmp;
+
+	tmp = (__wsum) ntohs(len) << 16;
+	tmp += (__wsum) proto << 8;
+
+	asm(
+		"	add	%1,%0		\n"
+		"	addc	%2,%0		\n"
+		"	addc	%3,%0		\n"
+		"	addc	0,%0		\n"
+		: "=r" (sum)
+		: "r" (daddr), "r"(saddr), "r"(tmp), "0"(sum)
+		: "cc"
+	    );
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(unsigned long saddr,
+					unsigned long daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+#undef _HAVE_ARCH_IPV6_CSUM
+
+/*
+ *	Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+extern __wsum csum_and_copy_to_user(const void *src, void *dst, int len,
+				    __wsum sum, int *err_ptr);
+
+
+#endif /* _ASM_CHECKSUM_H */
diff --git a/include/asm-mn10300/cpu-regs.h b/include/asm-mn10300/cpu-regs.h
new file mode 100644
index 000000000000..757e9b5388ea
--- /dev/null
+++ b/include/asm-mn10300/cpu-regs.h
@@ -0,0 +1,290 @@
+/* MN10300 Core system registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_CPU_REGS_H
+#define _ASM_CPU_REGS_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
+#ifdef CONFIG_MN10300_CPU_AM33V2
+/* we tell the compiler to pretend to be AM33 so that it doesn't try and use
+ * the FP regs, but tell the assembler that we're actually allowed AM33v2
+ * instructions */
+#ifndef __ASSEMBLY__
+asm(" .am33_2\n");
+#else
+.am33_2
+#endif
+#endif
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#define __SYSREG(ADDR, TYPE) (*(volatile TYPE *)(ADDR))
+#define __SYSREGC(ADDR, TYPE) (*(const volatile TYPE *)(ADDR))
+#else
+#define __SYSREG(ADDR, TYPE) ADDR
+#define __SYSREGC(ADDR, TYPE) ADDR
+#endif
+
+/* CPU registers */
+#define EPSW_FLAG_Z		0x00000001	/* zero flag */
+#define EPSW_FLAG_N		0x00000002	/* negative flag */
+#define EPSW_FLAG_C		0x00000004	/* carry flag */
+#define EPSW_FLAG_V		0x00000008	/* overflow flag */
+#define EPSW_IM			0x00000700	/* interrupt mode */
+#define EPSW_IM_0		0x00000000	/* interrupt mode 0 */
+#define EPSW_IM_1		0x00000100	/* interrupt mode 1 */
+#define EPSW_IM_2		0x00000200	/* interrupt mode 2 */
+#define EPSW_IM_3		0x00000300	/* interrupt mode 3 */
+#define EPSW_IM_4		0x00000400	/* interrupt mode 4 */
+#define EPSW_IM_5		0x00000500	/* interrupt mode 5 */
+#define EPSW_IM_6		0x00000600	/* interrupt mode 6 */
+#define EPSW_IM_7		0x00000700	/* interrupt mode 7 */
+#define EPSW_IE			0x00000800	/* interrupt enable */
+#define EPSW_S			0x00003000	/* software auxilliary bits */
+#define EPSW_T			0x00008000	/* trace enable */
+#define EPSW_nSL		0x00010000	/* not supervisor level */
+#define EPSW_NMID		0x00020000	/* nonmaskable interrupt disable */
+#define EPSW_nAR		0x00040000	/* register bank control */
+#define EPSW_ML			0x00080000	/* monitor level */
+#define EPSW_FE			0x00100000	/* FPU enable */
+
+/* FPU registers */
+#define FPCR_EF_I		0x00000001	/* inexact result FPU exception flag */
+#define FPCR_EF_U		0x00000002	/* underflow FPU exception flag */
+#define FPCR_EF_O		0x00000004	/* overflow FPU exception flag */
+#define FPCR_EF_Z		0x00000008	/* zero divide FPU exception flag */
+#define FPCR_EF_V		0x00000010	/* invalid operand FPU exception flag */
+#define FPCR_EE_I		0x00000020	/* inexact result FPU exception enable */
+#define FPCR_EE_U		0x00000040	/* underflow FPU exception enable */
+#define FPCR_EE_O		0x00000080	/* overflow FPU exception enable */
+#define FPCR_EE_Z		0x00000100	/* zero divide FPU exception enable */
+#define FPCR_EE_V		0x00000200	/* invalid operand FPU exception enable */
+#define FPCR_EC_I		0x00000400	/* inexact result FPU exception cause */
+#define FPCR_EC_U		0x00000800	/* underflow FPU exception cause */
+#define FPCR_EC_O		0x00001000	/* overflow FPU exception cause */
+#define FPCR_EC_Z		0x00002000	/* zero divide FPU exception cause */
+#define FPCR_EC_V		0x00004000	/* invalid operand FPU exception cause */
+#define FPCR_RM			0x00030000	/* rounding mode */
+#define FPCR_RM_NEAREST		0x00000000	/* - round to nearest value */
+#define FPCR_FCC_U		0x00040000	/* FPU unordered condition code */
+#define FPCR_FCC_E		0x00080000	/* FPU equal condition code */
+#define FPCR_FCC_G		0x00100000	/* FPU greater than condition code */
+#define FPCR_FCC_L		0x00200000	/* FPU less than condition code */
+#define FPCR_INIT		0x00000000	/* no exceptions, rounding to nearest */
+
+/* CPU control registers */
+#define CPUP			__SYSREG(0xc0000020, u16)	/* CPU pipeline register */
+#define CPUP_DWBD		0x0020		/* write buffer disable flag */
+#define CPUP_IPFD		0x0040		/* instruction prefetch disable flag */
+#define CPUP_EXM		0x0080		/* exception operation mode */
+#define CPUP_EXM_AM33V1		0x0000		/* - AM33 v1 exception mode */
+#define CPUP_EXM_AM33V2		0x0080		/* - AM33 v2 exception mode */
+
+#define CPUM			__SYSREG(0xc0000040, u16)	/* CPU mode register */
+#define CPUM_SLEEP		0x0004		/* set to enter sleep state */
+#define CPUM_HALT		0x0008		/* set to enter halt state */
+#define CPUM_STOP		0x0010		/* set to enter stop state */
+
+#define CPUREV			__SYSREGC(0xc0000050, u32)	/* CPU revision register */
+#define CPUREV_TYPE		0x0000000f	/* CPU type */
+#define CPUREV_TYPE_S		0
+#define CPUREV_TYPE_AM33V1	0x00000000	/* - AM33 V1 core, AM33/1.00 arch */
+#define CPUREV_TYPE_AM33V2	0x00000001	/* - AM33 V2 core, AM33/2.00 arch */
+#define CPUREV_TYPE_AM34V1	0x00000002	/* - AM34 V1 core, AM33/2.00 arch */
+#define CPUREV_REVISION		0x000000f0	/* CPU revision */
+#define CPUREV_REVISION_S	4
+#define CPUREV_ICWAY		0x00000f00	/* number of instruction cache ways */
+#define CPUREV_ICWAY_S		8
+#define CPUREV_ICSIZE		0x0000f000	/* instruction cache way size */
+#define CPUREV_ICSIZE_S		12
+#define CPUREV_DCWAY		0x000f0000	/* number of data cache ways */
+#define CPUREV_DCWAY_S		16
+#define CPUREV_DCSIZE		0x00f00000	/* data cache way size */
+#define CPUREV_DCSIZE_S		20
+#define CPUREV_FPUTYPE		0x0f000000	/* FPU core type */
+#define CPUREV_FPUTYPE_NONE	0x00000000	/* - no FPU core implemented */
+#define CPUREV_OCDCTG		0xf0000000	/* on-chip debug function category */
+
+#define DCR			__SYSREG(0xc0000030, u16)	/* Debug control register */
+
+/* interrupt/exception control registers */
+#define IVAR0			__SYSREG(0xc0000000, u16)	/* interrupt vector 0 */
+#define IVAR1			__SYSREG(0xc0000004, u16)	/* interrupt vector 1 */
+#define IVAR2			__SYSREG(0xc0000008, u16)	/* interrupt vector 2 */
+#define IVAR3			__SYSREG(0xc000000c, u16)	/* interrupt vector 3 */
+#define IVAR4			__SYSREG(0xc0000010, u16)	/* interrupt vector 4 */
+#define IVAR5			__SYSREG(0xc0000014, u16)	/* interrupt vector 5 */
+#define IVAR6			__SYSREG(0xc0000018, u16)	/* interrupt vector 6 */
+
+#define TBR			__SYSREG(0xc0000024, u32)	/* Trap table base */
+#define TBR_TB			0xff000000	/* table base address bits 31-24 */
+#define TBR_INT_CODE		0x00ffffff	/* interrupt code */
+
+#define DEAR			__SYSREG(0xc0000038, u32)	/* Data access exception address */
+
+#define sISR			__SYSREG(0xc0000044, u32)	/* Supervisor interrupt status */
+#define	sISR_IRQICE		0x00000001	/* ICE interrupt */
+#define	sISR_ISTEP		0x00000002	/* single step interrupt */
+#define	sISR_MISSA		0x00000004	/* memory access address misalignment fault */
+#define	sISR_UNIMP		0x00000008	/* unimplemented instruction execution fault */
+#define	sISR_PIEXE		0x00000010	/* program interrupt */
+#define	sISR_MEMERR		0x00000020	/* illegal memory access fault */
+#define	sISR_IBREAK		0x00000040	/* instraction break interrupt */
+#define	sISR_DBSRL		0x00000080	/* debug serial interrupt */
+#define	sISR_PERIDB		0x00000100	/* peripheral debug interrupt */
+#define	sISR_EXUNIMP		0x00000200	/* unimplemented ex-instruction execution fault */
+#define	sISR_OBREAK		0x00000400	/* operand break interrupt */
+#define	sISR_PRIV		0x00000800	/* privileged instruction execution fault */
+#define	sISR_BUSERR		0x00001000	/* bus error fault */
+#define	sISR_DBLFT		0x00002000	/* double fault */
+#define	sISR_DBG		0x00008000	/* debug reserved interrupt */
+#define sISR_ITMISS		0x00010000	/* instruction TLB miss */
+#define sISR_DTMISS		0x00020000	/* data TLB miss */
+#define sISR_ITEX		0x00040000	/* instruction TLB access exception */
+#define sISR_DTEX		0x00080000	/* data TLB access exception */
+#define sISR_ILGIA		0x00100000	/* illegal instruction access exception */
+#define sISR_ILGDA		0x00200000	/* illegal data access exception */
+#define sISR_IOIA		0x00400000	/* internal I/O space instruction access excep */
+#define sISR_PRIVA		0x00800000	/* privileged space instruction access excep */
+#define sISR_PRIDA		0x01000000	/* privileged space data access excep */
+#define sISR_DISA		0x02000000	/* data space instruction access excep */
+#define sISR_SYSC		0x04000000	/* system call instruction excep */
+#define sISR_FPUD		0x08000000	/* FPU disabled excep */
+#define sISR_FPUUI		0x10000000	/* FPU unimplemented instruction excep */
+#define sISR_FPUOP		0x20000000	/* FPU operation excep */
+#define sISR_NE			0x80000000	/* multiple synchronous exceptions excep */
+
+/* cache control registers */
+#define CHCTR			__SYSREG(0xc0000070, u16)	/* cache control */
+#define CHCTR_ICEN		0x0001		/* instruction cache enable */
+#define CHCTR_DCEN		0x0002		/* data cache enable */
+#define CHCTR_ICBUSY		0x0004		/* instruction cache busy */
+#define CHCTR_DCBUSY		0x0008		/* data cache busy */
+#define CHCTR_ICINV		0x0010		/* instruction cache invalidate */
+#define CHCTR_DCINV		0x0020		/* data cache invalidate */
+#define CHCTR_DCWTMD		0x0040		/* data cache writing mode */
+#define CHCTR_DCWTMD_WRBACK	0x0000		/* - write back mode */
+#define CHCTR_DCWTMD_WRTHROUGH	0x0040		/* - write through mode */
+#define CHCTR_DCALMD		0x0080		/* data cache allocation mode */
+#define CHCTR_ICWMD		0x0f00		/* instruction cache way mode */
+#define CHCTR_DCWMD		0xf000		/* data cache way mode */
+
+/* MMU control registers */
+#define MMUCTR			__SYSREG(0xc0000090, u32)	/* MMU control register */
+#define MMUCTR_IRP		0x0000003f	/* instruction TLB replace pointer */
+#define MMUCTR_ITE		0x00000040	/* instruction TLB enable */
+#define MMUCTR_IIV		0x00000080	/* instruction TLB invalidate */
+#define MMUCTR_ITL		0x00000700	/* instruction TLB lock pointer */
+#define MMUCTR_ITL_NOLOCK	0x00000000	/* - no lock */
+#define MMUCTR_ITL_LOCK0	0x00000100	/* - entry 0 locked */
+#define MMUCTR_ITL_LOCK0_1	0x00000200	/* - entry 0-1 locked */
+#define MMUCTR_ITL_LOCK0_3	0x00000300	/* - entry 0-3 locked */
+#define MMUCTR_ITL_LOCK0_7	0x00000400	/* - entry 0-7 locked */
+#define MMUCTR_ITL_LOCK0_15	0x00000500	/* - entry 0-15 locked */
+#define MMUCTR_CE		0x00008000	/* cacheable bit enable */
+#define MMUCTR_DRP		0x003f0000	/* data TLB replace pointer */
+#define MMUCTR_DTE		0x00400000	/* data TLB enable */
+#define MMUCTR_DIV		0x00800000	/* data TLB invalidate */
+#define MMUCTR_DTL		0x07000000	/* data TLB lock pointer */
+#define MMUCTR_DTL_NOLOCK	0x00000000	/* - no lock */
+#define MMUCTR_DTL_LOCK0	0x01000000	/* - entry 0 locked */
+#define MMUCTR_DTL_LOCK0_1	0x02000000	/* - entry 0-1 locked */
+#define MMUCTR_DTL_LOCK0_3	0x03000000	/* - entry 0-3 locked */
+#define MMUCTR_DTL_LOCK0_7	0x04000000	/* - entry 0-7 locked */
+#define MMUCTR_DTL_LOCK0_15	0x05000000	/* - entry 0-15 locked */
+
+#define PIDR			__SYSREG(0xc0000094, u16)	/* PID register */
+#define PIDR_PID		0x00ff		/* process identifier */
+
+#define PTBR			__SYSREG(0xc0000098, unsigned long) /* Page table base register */
+
+#define IPTEL			__SYSREG(0xc00000a0, u32)	/* instruction TLB entry */
+#define DPTEL			__SYSREG(0xc00000b0, u32)	/* data TLB entry */
+#define xPTEL_V			0x00000001	/* TLB entry valid */
+#define xPTEL_UNUSED1		0x00000002	/* unused bit */
+#define xPTEL_UNUSED2		0x00000004	/* unused bit */
+#define xPTEL_C			0x00000008	/* cached if set */
+#define xPTEL_PV		0x00000010	/* page valid */
+#define xPTEL_D			0x00000020	/* dirty */
+#define xPTEL_PR		0x000001c0	/* page protection */
+#define xPTEL_PR_ROK		0x00000000	/* - R/O kernel */
+#define xPTEL_PR_RWK		0x00000100	/* - R/W kernel */
+#define xPTEL_PR_ROK_ROU	0x00000080	/* - R/O kernel and R/O user */
+#define xPTEL_PR_RWK_ROU	0x00000180	/* - R/W kernel and R/O user */
+#define xPTEL_PR_RWK_RWU	0x000001c0	/* - R/W kernel and R/W user */
+#define xPTEL_G			0x00000200	/* global (use PID if 0) */
+#define xPTEL_PS		0x00000c00	/* page size */
+#define xPTEL_PS_4Kb		0x00000000	/* - 4Kb page */
+#define xPTEL_PS_128Kb		0x00000400	/* - 128Kb page */
+#define xPTEL_PS_1Kb		0x00000800	/* - 1Kb page */
+#define xPTEL_PS_4Mb		0x00000c00	/* - 4Mb page */
+#define xPTEL_PPN		0xfffff006	/* physical page number */
+
+#define xPTEL_V_BIT		0	/* bit numbers corresponding to above masks */
+#define xPTEL_UNUSED1_BIT	1
+#define xPTEL_UNUSED2_BIT	2
+#define xPTEL_C_BIT		3
+#define xPTEL_PV_BIT		4
+#define xPTEL_D_BIT		5
+#define xPTEL_G_BIT		9
+
+#define IPTEU			__SYSREG(0xc00000a4, u32)	/* instruction TLB virtual addr */
+#define DPTEU			__SYSREG(0xc00000b4, u32)	/* data TLB virtual addr */
+#define xPTEU_VPN		0xfffffc00	/* virtual page number */
+#define xPTEU_PID		0x000000ff	/* process identifier to which applicable */
+
+#define IPTEL2			__SYSREG(0xc00000a8, u32)	/* instruction TLB entry */
+#define DPTEL2			__SYSREG(0xc00000b8, u32)	/* data TLB entry */
+#define xPTEL2_V		0x00000001	/* TLB entry valid */
+#define xPTEL2_C		0x00000002	/* cacheable */
+#define xPTEL2_PV		0x00000004	/* page valid */
+#define xPTEL2_D		0x00000008	/* dirty */
+#define xPTEL2_PR		0x00000070	/* page protection */
+#define xPTEL2_PR_ROK		0x00000000	/* - R/O kernel */
+#define xPTEL2_PR_RWK		0x00000040	/* - R/W kernel */
+#define xPTEL2_PR_ROK_ROU	0x00000020	/* - R/O kernel and R/O user */
+#define xPTEL2_PR_RWK_ROU	0x00000060	/* - R/W kernel and R/O user */
+#define xPTEL2_PR_RWK_RWU	0x00000070	/* - R/W kernel and R/W user */
+#define xPTEL2_G		0x00000080	/* global (use PID if 0) */
+#define xPTEL2_PS		0x00000300	/* page size */
+#define xPTEL2_PS_4Kb		0x00000000	/* - 4Kb page */
+#define xPTEL2_PS_128Kb		0x00000100	/* - 128Kb page */
+#define xPTEL2_PS_1Kb		0x00000200	/* - 1Kb page */
+#define xPTEL2_PS_4Mb		0x00000300	/* - 4Mb page */
+#define xPTEL2_PPN		0xfffffc00	/* physical page number */
+
+#define MMUFCR			__SYSREGC(0xc000009c, u32)	/* MMU exception cause */
+#define MMUFCR_IFC		__SYSREGC(0xc000009c, u16)	/* MMU instruction excep cause */
+#define MMUFCR_DFC		__SYSREGC(0xc000009e, u16)	/* MMU data exception cause */
+#define MMUFCR_xFC_TLBMISS	0x0001		/* TLB miss flag */
+#define MMUFCR_xFC_INITWR	0x0002		/* initial write excep flag */
+#define MMUFCR_xFC_PGINVAL	0x0004		/* page invalid excep flag */
+#define MMUFCR_xFC_PROTVIOL	0x0008		/* protection violation excep flag */
+#define MMUFCR_xFC_ACCESS	0x0010		/* access level flag */
+#define MMUFCR_xFC_ACCESS_USR	0x0000		/* - user mode */
+#define MMUFCR_xFC_ACCESS_SR	0x0010		/* - supervisor mode */
+#define MMUFCR_xFC_TYPE		0x0020		/* access type flag */
+#define MMUFCR_xFC_TYPE_READ	0x0000		/* - read */
+#define MMUFCR_xFC_TYPE_WRITE	0x0020		/* - write */
+#define MMUFCR_xFC_PR		0x01c0		/* page protection flag */
+#define MMUFCR_xFC_PR_ROK	0x0000		/* - R/O kernel */
+#define MMUFCR_xFC_PR_RWK	0x0100		/* - R/W kernel */
+#define MMUFCR_xFC_PR_ROK_ROU	0x0080		/* - R/O kernel and R/O user */
+#define MMUFCR_xFC_PR_RWK_ROU	0x0180		/* - R/W kernel and R/O user */
+#define MMUFCR_xFC_PR_RWK_RWU	0x01c0		/* - R/W kernel and R/W user */
+#define MMUFCR_xFC_ILLADDR	0x0200		/* illegal address excep flag */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_CPU_REGS_H */
diff --git a/include/asm-mn10300/cputime.h b/include/asm-mn10300/cputime.h
new file mode 100644
index 000000000000..6d68ad7e0ea3
--- /dev/null
+++ b/include/asm-mn10300/cputime.h
@@ -0,0 +1 @@
+#include <asm-generic/cputime.h>
diff --git a/include/asm-mn10300/current.h b/include/asm-mn10300/current.h
new file mode 100644
index 000000000000..ca6027d83743
--- /dev/null
+++ b/include/asm-mn10300/current.h
@@ -0,0 +1,37 @@
+/* MN10300 Current task structure accessor
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_CURRENT_H
+#define _ASM_CURRENT_H
+
+#include <linux/thread_info.h>
+
+/*
+ * dedicate E2 to keeping the current task pointer
+ */
+#ifdef CONFIG_MN10300_CURRENT_IN_E2
+
+register struct task_struct *const current asm("e2") __attribute__((used));
+
+#define get_current() current
+
+extern struct task_struct *__current;
+
+#else
+static inline __attribute__((const))
+struct task_struct *get_current(void)
+{
+	return current_thread_info()->task;
+}
+
+#define current get_current()
+#endif
+
+#endif /* _ASM_CURRENT_H */
diff --git a/include/asm-mn10300/delay.h b/include/asm-mn10300/delay.h
new file mode 100644
index 000000000000..34517b359399
--- /dev/null
+++ b/include/asm-mn10300/delay.h
@@ -0,0 +1,19 @@
+/* MN10300 Uninterruptible delay routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_DELAY_H
+#define _ASM_DELAY_H
+
+extern void __udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) __udelay(n)
+
+#endif /* _ASM_DELAY_H */
diff --git a/include/asm-mn10300/device.h b/include/asm-mn10300/device.h
new file mode 100644
index 000000000000..f0a4c256403b
--- /dev/null
+++ b/include/asm-mn10300/device.h
@@ -0,0 +1 @@
+#include <asm-generic/device.h>
diff --git a/include/asm-mn10300/div64.h b/include/asm-mn10300/div64.h
new file mode 100644
index 000000000000..bf9c515a998c
--- /dev/null
+++ b/include/asm-mn10300/div64.h
@@ -0,0 +1,103 @@
+/* MN10300 64-bit division
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_DIV64
+#define _ASM_DIV64
+
+#include <linux/types.h>
+
+extern void ____unhandled_size_in_do_div___(void);
+
+/*
+ * divide n by base, leaving the result in n and returning the remainder
+ * - we can do this quite efficiently on the MN10300 by cascading the divides
+ *   through the MDR register
+ */
+#define do_div(n, base)							\
+({									\
+	unsigned __rem = 0;						\
+	if (sizeof(n) <= 4) {						\
+		asm("mov	%1,mdr	\n"				\
+		    "divu	%2,%0	\n"				\
+		    "mov	mdr,%1	\n"				\
+		    : "+r"(n), "=d"(__rem)				\
+		    : "r"(base), "1"(__rem)				\
+		    : "cc"						\
+		    );							\
+	} else if (sizeof(n) <= 8) {					\
+		union {							\
+			unsigned long long l;				\
+			u32 w[2];					\
+		} __quot;						\
+		__quot.l = n;						\
+		asm("mov	%0,mdr	\n"	/* MDR = 0 */		\
+		    "divu	%3,%1	\n"				\
+		    /* __quot.MSL = __div.MSL / base, */		\
+		    /* MDR = MDR:__div.MSL % base */			\
+		    "divu	%3,%2	\n"				\
+		    /* __quot.LSL = MDR:__div.LSL / base, */		\
+		    /* MDR = MDR:__div.LSL % base */			\
+		    "mov	mdr,%0	\n"				\
+		    : "=d"(__rem), "=r"(__quot.w[1]), "=r"(__quot.w[0])	\
+		    : "r"(base), "0"(__rem), "1"(__quot.w[1]),		\
+		      "2"(__quot.w[0])					\
+		    : "cc"						\
+		    );							\
+		n = __quot.l;						\
+	} else {							\
+		____unhandled_size_in_do_div___();			\
+	}								\
+	__rem;								\
+})
+
+/*
+ * do an unsigned 32-bit multiply and divide with intermediate 64-bit product
+ * so as not to lose accuracy
+ * - we use the MDR register to hold the MSW of the product
+ */
+static inline __attribute__((const))
+unsigned __muldiv64u(unsigned val, unsigned mult, unsigned div)
+{
+	unsigned result;
+
+	asm("mulu	%2,%0	\n"	/* MDR:val = val*mult */
+	    "divu	%3,%0	\n"	/* val = MDR:val/div;
+					 * MDR = MDR:val%div */
+	    : "=r"(result)
+	    : "0"(val), "ir"(mult), "r"(div)
+	    );
+
+	return result;
+}
+
+/*
+ * do a signed 32-bit multiply and divide with intermediate 64-bit product so
+ * as not to lose accuracy
+ * - we use the MDR register to hold the MSW of the product
+ */
+static inline __attribute__((const))
+signed __muldiv64s(signed val, signed mult, signed div)
+{
+	signed result;
+
+	asm("mul	%2,%0	\n"	/* MDR:val = val*mult */
+	    "div	%3,%0	\n"	/* val = MDR:val/div;
+					 * MDR = MDR:val%div */
+	    : "=r"(result)
+	    : "0"(val), "ir"(mult), "r"(div)
+	    );
+
+	return result;
+}
+
+extern __attribute__((const))
+uint64_t div64_64(uint64_t dividend, uint64_t divisor);
+
+#endif /* _ASM_DIV64 */
diff --git a/include/asm-mn10300/dma-mapping.h b/include/asm-mn10300/dma-mapping.h
new file mode 100644
index 000000000000..7c882fca9ec8
--- /dev/null
+++ b/include/asm-mn10300/dma-mapping.h
@@ -0,0 +1,234 @@
+/* DMA mapping routines for the MN10300 arch
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_DMA_MAPPING_H
+#define _ASM_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+				dma_addr_t *dma_handle, int flag);
+
+extern void dma_free_coherent(struct device *dev, size_t size,
+			      void *vaddr, dma_addr_t dma_handle);
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent((d), (s), (h), (f))
+#define dma_free_noncoherent(d, s, v, h)  dma_free_coherent((d), (s), (v), (h))
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.  The
+ * 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+			  enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+	mn10300_dcache_flush_inv();
+	return virt_to_bus(ptr);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+ * match what was provided for in a previous pci_map_single call.  All other
+ * usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scather-gather version of the above pci_map_single interface.
+ * Here the scatter gather list elements are each tagged with the appropriate
+ * dma address and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of DMA
+ *       address/length pairs than there are SG table elements.  (for example
+ *       via virtual mapping capabilities) The routine returns the number of
+ *       addr/length pairs actually used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are the same
+ * here.
+ */
+static inline
+int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+	       enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nents == 0 || sglist[0].length == 0);
+
+	for_each_sg(sglist, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg->dma_address = sg_phys(sg);
+	}
+
+	mn10300_dcache_flush_inv();
+	return nents;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+		  enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+static inline
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+			unsigned long offset, size_t size,
+			enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+	return page_to_bus(page) + offset;
+}
+
+static inline
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+		    enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the buffer using
+ * the cpu, yet do not wish to teardown the PCI dma mapping, you must call this
+ * function before doing so.  At the next point you give the PCI dma address
+ * back to the card, the device again owns the buffer.
+ */
+static inline
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			     size_t size, enum dma_data_direction direction)
+{
+}
+
+static inline
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+				size_t size, enum dma_data_direction direction)
+{
+	mn10300_dcache_flush_inv();
+}
+
+static inline
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				   unsigned long offset, size_t size,
+				   enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
+{
+	mn10300_dcache_flush_inv();
+}
+
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list, same rules
+ * and usage.
+ */
+static inline
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			 int nelems, enum dma_data_direction direction)
+{
+}
+
+static inline
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			    int nelems, enum dma_data_direction direction)
+{
+	mn10300_dcache_flush_inv();
+}
+
+static inline
+int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+/*
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits during
+ * PCI bus mastering, then you would pass 0x00ffffff as the mask to this
+ * function.
+ */
+static inline
+int dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * we fall back to GFP_DMA when the mask isn't all 1s, so we can't
+	 * guarantee allocations that must be within a tighter range than
+	 * GFP_DMA
+	 */
+	if (mask < 0x00ffffff)
+		return 0;
+	return 1;
+}
+
+static inline
+int dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+	return 0;
+}
+
+static inline
+int dma_get_cache_alignment(void)
+{
+	return 1 << L1_CACHE_SHIFT;
+}
+
+#define dma_is_consistent(d)	(1)
+
+static inline
+void dma_cache_sync(void *vaddr, size_t size,
+		    enum dma_data_direction direction)
+{
+	mn10300_dcache_flush_inv();
+}
+
+#endif
diff --git a/include/asm-mn10300/dma.h b/include/asm-mn10300/dma.h
new file mode 100644
index 000000000000..098df2e617ab
--- /dev/null
+++ b/include/asm-mn10300/dma.h
@@ -0,0 +1,118 @@
+/* MN10300 ISA DMA handlers and definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <asm/system.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <linux/delay.h>
+
+#undef MAX_DMA_CHANNELS		/* switch off linux/kernel/dma.c */
+#define MAX_DMA_ADDRESS		0xbfffffff
+
+extern spinlock_t dma_spin_lock;
+
+static inline unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static inline void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static inline void enable_dma(unsigned int dmanr)
+{
+}
+
+static inline void disable_dma(unsigned int dmanr)
+{
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static inline void clear_dma_ff(unsigned int dmanr)
+{
+}
+
+/* set mode (above) for a specific DMA channel */
+static inline void set_dma_mode(unsigned int dmanr, char mode)
+{
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static inline void set_dma_page(unsigned int dmanr, char pagenr)
+{
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static inline void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static inline void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static inline int get_dma_residue(unsigned int dmanr)
+{
+	return 0;
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 	(0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/include/asm-mn10300/dmactl-regs.h b/include/asm-mn10300/dmactl-regs.h
new file mode 100644
index 000000000000..58a199da0f4a
--- /dev/null
+++ b/include/asm-mn10300/dmactl-regs.h
@@ -0,0 +1,101 @@
+/* MN10300 on-board DMA controller registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_DMACTL_REGS_H
+#define _ASM_DMACTL_REGS_H
+
+#include <asm/cpu-regs.h>
+
+#ifdef __KERNEL__
+
+/* DMA registers */
+#define	DMxCTR(N)		__SYSREG(0xd2000000 + ((N) * 0x100), u32)	/* control reg */
+#define	DMxCTR_BG		0x0000001f	/* transfer request source */
+#define	DMxCTR_BG_SOFT		0x00000000	/* - software source */
+#define	DMxCTR_BG_SC0TX		0x00000002	/* - serial port 0 transmission */
+#define	DMxCTR_BG_SC0RX		0x00000003	/* - serial port 0 reception */
+#define	DMxCTR_BG_SC1TX		0x00000004	/* - serial port 1 transmission */
+#define	DMxCTR_BG_SC1RX		0x00000005	/* - serial port 1 reception */
+#define	DMxCTR_BG_SC2TX		0x00000006	/* - serial port 2 transmission */
+#define	DMxCTR_BG_SC2RX		0x00000007	/* - serial port 2 reception */
+#define	DMxCTR_BG_TM0UFLOW	0x00000008	/* - timer 0 underflow */
+#define	DMxCTR_BG_TM1UFLOW	0x00000009	/* - timer 1 underflow */
+#define	DMxCTR_BG_TM2UFLOW	0x0000000a	/* - timer 2 underflow */
+#define	DMxCTR_BG_TM3UFLOW	0x0000000b	/* - timer 3 underflow */
+#define	DMxCTR_BG_TM6ACMPCAP	0x0000000c	/* - timer 6A compare/capture */
+#define	DMxCTR_BG_AFE		0x0000000d	/* - analogue front-end interrupt source */
+#define	DMxCTR_BG_ADC		0x0000000e	/* - A/D conversion end interrupt source */
+#define	DMxCTR_BG_IRDA		0x0000000f	/* - IrDA interrupt source */
+#define	DMxCTR_BG_RTC		0x00000010	/* - RTC interrupt source */
+#define	DMxCTR_BG_XIRQ0		0x00000011	/* - XIRQ0 pin interrupt source */
+#define	DMxCTR_BG_XIRQ1		0x00000012	/* - XIRQ1 pin interrupt source */
+#define	DMxCTR_BG_XDMR0		0x00000013	/* - external request 0 source (XDMR0 pin) */
+#define	DMxCTR_BG_XDMR1		0x00000014	/* - external request 1 source (XDMR1 pin) */
+#define	DMxCTR_SAM		0x000000e0	/* DMA transfer src addr mode */
+#define	DMxCTR_SAM_INCR		0x00000000	/* - increment */
+#define	DMxCTR_SAM_DECR		0x00000020	/* - decrement */
+#define	DMxCTR_SAM_FIXED	0x00000040	/* - fixed */
+#define	DMxCTR_DAM		0x00000000	/* DMA transfer dest addr mode */
+#define	DMxCTR_DAM_INCR		0x00000000	/* - increment */
+#define	DMxCTR_DAM_DECR		0x00000100	/* - decrement */
+#define	DMxCTR_DAM_FIXED	0x00000200	/* - fixed */
+#define	DMxCTR_TM		0x00001800	/* DMA transfer mode */
+#define	DMxCTR_TM_BATCH		0x00000000	/* - batch transfer */
+#define	DMxCTR_TM_INTERM	0x00001000	/* - intermittent transfer */
+#define	DMxCTR_UT		0x00006000	/* DMA transfer unit */
+#define	DMxCTR_UT_1		0x00000000	/* - 1 byte */
+#define	DMxCTR_UT_2		0x00002000	/* - 2 byte */
+#define	DMxCTR_UT_4		0x00004000	/* - 4 byte */
+#define	DMxCTR_UT_16		0x00006000	/* - 16 byte */
+#define	DMxCTR_TEN		0x00010000	/* DMA channel transfer enable */
+#define	DMxCTR_RQM		0x00060000	/* external request input source mode */
+#define	DMxCTR_RQM_FALLEDGE	0x00000000	/* - falling edge */
+#define	DMxCTR_RQM_RISEEDGE	0x00020000	/* - rising edge */
+#define	DMxCTR_RQM_LOLEVEL	0x00040000	/* - low level */
+#define	DMxCTR_RQM_HILEVEL	0x00060000	/* - high level */
+#define	DMxCTR_RQF		0x01000000	/* DMA transfer request flag */
+#define	DMxCTR_XEND		0x80000000	/* DMA transfer end flag */
+
+#define	DMxSRC(N)		__SYSREG(0xd2000004 + ((N) * 0x100), u32)	/* control reg */
+
+#define	DMxDST(N)		__SYSREG(0xd2000008 + ((N) * 0x100), u32)	/* src addr reg */
+
+#define	DMxSIZ(N)		__SYSREG(0xd200000c + ((N) * 0x100), u32)	/* dest addr reg */
+#define DMxSIZ_CT		0x000fffff	/* number of bytes to transfer */
+
+#define	DMxCYC(N)		__SYSREG(0xd2000010 + ((N) * 0x100), u32)	/* intermittent
+										 * size reg */
+#define DMxCYC_CYC		0x000000ff	/* number of interrmittent transfers -1 */
+
+#define DM0IRQ			16		/* DMA channel 0 complete IRQ */
+#define DM1IRQ			17		/* DMA channel 1 complete IRQ */
+#define DM2IRQ			18		/* DMA channel 2 complete IRQ */
+#define DM3IRQ			19		/* DMA channel 3 complete IRQ */
+
+#define	DM0ICR			GxICR(DM0IRQ)	/* DMA channel 0 complete intr ctrl reg */
+#define	DM1ICR			GxICR(DM0IR1)	/* DMA channel 1 complete intr ctrl reg */
+#define	DM2ICR			GxICR(DM0IR2)	/* DMA channel 2 complete intr ctrl reg */
+#define	DM3ICR			GxICR(DM0IR3)	/* DMA channel 3 complete intr ctrl reg */
+
+#ifndef __ASSEMBLY__
+
+struct mn10300_dmactl_regs {
+	u32		ctr;
+	const void	*src;
+	void		*dst;
+	u32		siz;
+	u32		cyc;
+} __attribute__((aligned(0x100)));
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_DMACTL_REGS_H */
diff --git a/include/asm-mn10300/elf.h b/include/asm-mn10300/elf.h
new file mode 100644
index 000000000000..256a70466ca4
--- /dev/null
+++ b/include/asm-mn10300/elf.h
@@ -0,0 +1,147 @@
+/* MN10300 ELF constant and register definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_ELF_H
+#define _ASM_ELF_H
+
+#include <linux/utsname.h>
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+/*
+ * AM33 relocations
+ */
+#define R_MN10300_NONE		0	/* No reloc.  */
+#define R_MN10300_32		1	/* Direct 32 bit.  */
+#define R_MN10300_16		2	/* Direct 16 bit.  */
+#define R_MN10300_8		3	/* Direct 8 bit.  */
+#define R_MN10300_PCREL32	4	/* PC-relative 32-bit.  */
+#define R_MN10300_PCREL16	5	/* PC-relative 16-bit signed.  */
+#define R_MN10300_PCREL8	6	/* PC-relative 8-bit signed.  */
+#define R_MN10300_24		9	/* Direct 24 bit.  */
+#define R_MN10300_RELATIVE	23	/* Adjust by program base.  */
+
+/*
+ * ELF register definitions..
+ */
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#define ELF_NFPREG 32
+typedef float elf_fpreg_t;
+
+typedef struct {
+	elf_fpreg_t	fpregs[ELF_NFPREG];
+	u_int32_t	fpcr;
+} elf_fpregset_t;
+
+extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture
+ */
+#define elf_check_arch(x) \
+	(((x)->e_machine == EM_CYGNUS_MN10300) ||	\
+	 ((x)->e_machine == EM_MN10300))
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_MN10300
+
+/*
+ * ELF process initialiser
+ */
+#define ELF_PLAT_INIT(_r, load_addr)					\
+do {									\
+	struct pt_regs *_ur = current->thread.uregs;			\
+	_ur->a3   = 0;	_ur->a2   = 0;	_ur->d3   = 0;	_ur->d2   = 0;	\
+	_ur->mcvf = 0;	_ur->mcrl = 0;	_ur->mcrh = 0;	_ur->mdrq = 0;	\
+	_ur->e1   = 0;	_ur->e0   = 0;	_ur->e7   = 0;	_ur->e6   = 0;	\
+	_ur->e5   = 0;	_ur->e4   = 0;	_ur->e3   = 0;	_ur->e2   = 0;	\
+	_ur->lar  = 0;	_ur->lir  = 0;	_ur->mdr  = 0;			\
+	_ur->a1   = 0;	_ur->a0   = 0;	_ur->d1   = 0;	_ur->d0   = 0;	\
+} while (0)
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ * - must clear the VMALLOC area
+ */
+#define ELF_ET_DYN_BASE         0x04000000
+
+/*
+ * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ * now struct user_regs, they are different)
+ * - ELF_CORE_COPY_REGS has been guessed, and may be wrong
+ */
+#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
+do {						\
+	pr_reg[0]	= regs->a3;		\
+	pr_reg[1]	= regs->a2;		\
+	pr_reg[2]	= regs->d3;		\
+	pr_reg[3]	= regs->d2;		\
+	pr_reg[4]	= regs->mcvf;		\
+	pr_reg[5]	= regs->mcrl;		\
+	pr_reg[6]	= regs->mcrh;		\
+	pr_reg[7]	= regs->mdrq;		\
+	pr_reg[8]	= regs->e1;		\
+	pr_reg[9]	= regs->e0;		\
+	pr_reg[10]	= regs->e7;		\
+	pr_reg[11]	= regs->e6;		\
+	pr_reg[12]	= regs->e5;		\
+	pr_reg[13]	= regs->e4;		\
+	pr_reg[14]	= regs->e3;		\
+	pr_reg[15]	= regs->e2;		\
+	pr_reg[16]	= regs->sp;		\
+	pr_reg[17]	= regs->lar;		\
+	pr_reg[18]	= regs->lir;		\
+	pr_reg[19]	= regs->mdr;		\
+	pr_reg[20]	= regs->a1;		\
+	pr_reg[21]	= regs->a0;		\
+	pr_reg[22]	= regs->d1;		\
+	pr_reg[23]	= regs->d0;		\
+	pr_reg[24]	= regs->orig_d0;	\
+	pr_reg[25]	= regs->epsw;		\
+	pr_reg[26]	= regs->pc;		\
+} while (0);
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP	(0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ *
+ * For the moment, we have only optimizations for the Intel generations,
+ * but that could change...
+ */
+#define ELF_PLATFORM  (NULL)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX)
+#endif
+
+#endif /* _ASM_ELF_H */
diff --git a/include/asm-mn10300/emergency-restart.h b/include/asm-mn10300/emergency-restart.h
new file mode 100644
index 000000000000..3711bd9d50bd
--- /dev/null
+++ b/include/asm-mn10300/emergency-restart.h
@@ -0,0 +1 @@
+#include <asm-generic/emergency-restart.h>
diff --git a/include/asm-mn10300/errno.h b/include/asm-mn10300/errno.h
new file mode 100644
index 000000000000..4c82b503d92f
--- /dev/null
+++ b/include/asm-mn10300/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/include/asm-mn10300/exceptions.h b/include/asm-mn10300/exceptions.h
new file mode 100644
index 000000000000..fa16466ef3f9
--- /dev/null
+++ b/include/asm-mn10300/exceptions.h
@@ -0,0 +1,121 @@
+/* MN10300 Microcontroller core exceptions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_EXCEPTIONS_H
+#define _ASM_EXCEPTIONS_H
+
+#include <linux/linkage.h>
+
+/*
+ * define the breakpoint instruction opcode to use
+ * - note that the JTAG unit steals 0xFF, so we want to avoid that if we can
+ *   (can use 0xF7)
+ */
+#define GDBSTUB_BKPT		0xFF
+
+#ifndef __ASSEMBLY__
+
+/*
+ * enumeration of exception codes (as extracted from TBR MSW)
+ */
+enum exception_code {
+	EXCEP_RESET		= 0x000000,	/* reset */
+
+	/* MMU exceptions */
+	EXCEP_ITLBMISS		= 0x000100,	/* instruction TLB miss */
+	EXCEP_DTLBMISS		= 0x000108,	/* data TLB miss */
+	EXCEP_IAERROR		= 0x000110,	/* instruction address */
+	EXCEP_DAERROR		= 0x000118,	/* data address */
+
+	/* system exceptions */
+	EXCEP_TRAP		= 0x000128,	/* program interrupt (PI instruction) */
+	EXCEP_ISTEP		= 0x000130,	/* single step */
+	EXCEP_IBREAK		= 0x000150,	/* instruction breakpoint */
+	EXCEP_OBREAK		= 0x000158,	/* operand breakpoint */
+	EXCEP_PRIVINS		= 0x000160,	/* privileged instruction execution */
+	EXCEP_UNIMPINS		= 0x000168,	/* unimplemented instruction execution */
+	EXCEP_UNIMPEXINS	= 0x000170,	/* unimplemented extended instruction execution */
+	EXCEP_MEMERR		= 0x000178,	/* illegal memory access */
+	EXCEP_MISALIGN		= 0x000180,	/* misalignment */
+	EXCEP_BUSERROR		= 0x000188,	/* bus error */
+	EXCEP_ILLINSACC		= 0x000190,	/* illegal instruction access */
+	EXCEP_ILLDATACC		= 0x000198,	/* illegal data access */
+	EXCEP_IOINSACC		= 0x0001a0,	/* I/O space instruction access */
+	EXCEP_PRIVINSACC	= 0x0001a8,	/* privileged space instruction access */
+	EXCEP_PRIVDATACC	= 0x0001b0,	/* privileged space data access */
+	EXCEP_DATINSACC		= 0x0001b8,	/* data space instruction access */
+	EXCEP_DOUBLE_FAULT	= 0x000200,	/* double fault */
+
+	/* FPU exceptions */
+	EXCEP_FPU_DISABLED	= 0x0001c0,	/* FPU disabled */
+	EXCEP_FPU_UNIMPINS	= 0x0001c8,	/* FPU unimplemented operation */
+	EXCEP_FPU_OPERATION	= 0x0001d0,	/* FPU operation */
+
+	/* interrupts */
+	EXCEP_WDT		= 0x000240,	/* watchdog timer overflow */
+	EXCEP_NMI		= 0x000248,	/* non-maskable interrupt */
+	EXCEP_IRQ_LEVEL0	= 0x000280,	/* level 0 maskable interrupt */
+	EXCEP_IRQ_LEVEL1	= 0x000288,	/* level 1 maskable interrupt */
+	EXCEP_IRQ_LEVEL2	= 0x000290,	/* level 2 maskable interrupt */
+	EXCEP_IRQ_LEVEL3	= 0x000298,	/* level 3 maskable interrupt */
+	EXCEP_IRQ_LEVEL4	= 0x0002a0,	/* level 4 maskable interrupt */
+	EXCEP_IRQ_LEVEL5	= 0x0002a8,	/* level 5 maskable interrupt */
+	EXCEP_IRQ_LEVEL6	= 0x0002b0,	/* level 6 maskable interrupt */
+
+	/* system calls */
+	EXCEP_SYSCALL0		= 0x000300,	/* system call 0 */
+	EXCEP_SYSCALL1		= 0x000308,	/* system call 1 */
+	EXCEP_SYSCALL2		= 0x000310,	/* system call 2 */
+	EXCEP_SYSCALL3		= 0x000318,	/* system call 3 */
+	EXCEP_SYSCALL4		= 0x000320,	/* system call 4 */
+	EXCEP_SYSCALL5		= 0x000328,	/* system call 5 */
+	EXCEP_SYSCALL6		= 0x000330,	/* system call 6 */
+	EXCEP_SYSCALL7		= 0x000338,	/* system call 7 */
+	EXCEP_SYSCALL8		= 0x000340,	/* system call 8 */
+	EXCEP_SYSCALL9		= 0x000348,	/* system call 9 */
+	EXCEP_SYSCALL10		= 0x000350,	/* system call 10 */
+	EXCEP_SYSCALL11		= 0x000358,	/* system call 11 */
+	EXCEP_SYSCALL12		= 0x000360,	/* system call 12 */
+	EXCEP_SYSCALL13		= 0x000368,	/* system call 13 */
+	EXCEP_SYSCALL14		= 0x000370,	/* system call 14 */
+	EXCEP_SYSCALL15		= 0x000378,	/* system call 15 */
+};
+
+extern void __set_intr_stub(enum exception_code code, void *handler);
+extern void set_intr_stub(enum exception_code code, void *handler);
+extern void set_jtag_stub(enum exception_code code, void *handler);
+
+struct pt_regs;
+
+extern asmlinkage void __common_exception(void);
+extern asmlinkage void itlb_miss(void);
+extern asmlinkage void dtlb_miss(void);
+extern asmlinkage void itlb_aerror(void);
+extern asmlinkage void dtlb_aerror(void);
+extern asmlinkage void raw_bus_error(void);
+extern asmlinkage void double_fault(void);
+extern asmlinkage int  system_call(struct pt_regs *);
+extern asmlinkage void fpu_exception(struct pt_regs *, enum exception_code);
+extern asmlinkage void nmi(struct pt_regs *, enum exception_code);
+extern asmlinkage void uninitialised_exception(struct pt_regs *,
+					       enum exception_code);
+extern asmlinkage void irq_handler(void);
+extern asmlinkage void profile_handler(void);
+extern asmlinkage void nmi_handler(void);
+extern asmlinkage void misalignment(struct pt_regs *, enum exception_code);
+
+extern void die(const char *, struct pt_regs *, enum exception_code)
+	ATTRIB_NORET;
+
+extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_EXCEPTIONS_H */
diff --git a/include/asm-mn10300/fb.h b/include/asm-mn10300/fb.h
new file mode 100644
index 000000000000..697b24a91e1a
--- /dev/null
+++ b/include/asm-mn10300/fb.h
@@ -0,0 +1,23 @@
+/* MN10300 Frame buffer stuff
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_FB_H
+#define _ASM_FB_H
+
+#include <linux/fb.h>
+
+#define fb_pgprotect(...) do {} while (0)
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H */
diff --git a/include/asm-mn10300/fcntl.h b/include/asm-mn10300/fcntl.h
new file mode 100644
index 000000000000..46ab12db5739
--- /dev/null
+++ b/include/asm-mn10300/fcntl.h
@@ -0,0 +1 @@
+#include <asm-generic/fcntl.h>
diff --git a/include/asm-mn10300/fpu.h b/include/asm-mn10300/fpu.h
new file mode 100644
index 000000000000..64a2b83a7a6a
--- /dev/null
+++ b/include/asm-mn10300/fpu.h
@@ -0,0 +1,85 @@
+/* MN10300 FPU definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Derived from include/asm-i386/i387.h: Copyright (C) 1994 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_FPU_H
+#define _ASM_FPU_H
+
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+
+#ifdef __KERNEL__
+
+/* the task that owns the FPU state */
+extern struct task_struct *fpu_state_owner;
+
+#define set_using_fpu(tsk)				\
+do {							\
+	(tsk)->thread.fpu_flags |= THREAD_USING_FPU;	\
+} while (0)
+
+#define clear_using_fpu(tsk)				\
+do {							\
+	(tsk)->thread.fpu_flags &= ~THREAD_USING_FPU;	\
+} while (0)
+
+#define is_using_fpu(tsk) ((tsk)->thread.fpu_flags & THREAD_USING_FPU)
+
+#define unlazy_fpu(tsk)					\
+do {							\
+	preempt_disable();				\
+	if (fpu_state_owner == (tsk))			\
+		fpu_save(&tsk->thread.fpu_state);	\
+	preempt_enable();				\
+} while (0)
+
+#define exit_fpu()				\
+do {						\
+	struct task_struct *__tsk = current;	\
+	preempt_disable();			\
+	if (fpu_state_owner == __tsk)		\
+		fpu_state_owner = NULL;		\
+	preempt_enable();			\
+} while (0)
+
+#define flush_fpu()					\
+do {							\
+	struct task_struct *__tsk = current;		\
+	preempt_disable();				\
+	if (fpu_state_owner == __tsk) {			\
+		fpu_state_owner = NULL;			\
+		__tsk->thread.uregs->epsw &= ~EPSW_FE;	\
+	}						\
+	preempt_enable();				\
+	clear_using_fpu(__tsk);				\
+} while (0)
+
+extern asmlinkage void fpu_init_state(void);
+extern asmlinkage void fpu_kill_state(struct task_struct *);
+extern asmlinkage void fpu_disabled(struct pt_regs *, enum exception_code);
+extern asmlinkage void fpu_exception(struct pt_regs *, enum exception_code);
+
+#ifdef CONFIG_FPU
+extern asmlinkage void fpu_save(struct fpu_state_struct *);
+extern asmlinkage void fpu_restore(struct fpu_state_struct *);
+#else
+#define fpu_save(a)
+#define fpu_restore(a)
+#endif /* CONFIG_FPU  */
+
+/*
+ * signal frame handlers
+ */
+extern int fpu_setup_sigcontext(struct fpucontext *buf);
+extern int fpu_restore_sigcontext(struct fpucontext *buf);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_FPU_H */
diff --git a/include/asm-mn10300/frame.inc b/include/asm-mn10300/frame.inc
new file mode 100644
index 000000000000..5b1949bdf039
--- /dev/null
+++ b/include/asm-mn10300/frame.inc
@@ -0,0 +1,91 @@
+/* MN10300 Microcontroller core system register definitions -*- asm -*-
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_FRAME_INC
+#define _ASM_FRAME_INC
+
+#ifndef __ASSEMBLY__
+#error not for use in C files
+#endif
+
+#ifndef __ASM_OFFSETS_H__
+#include <asm/asm-offsets.h>
+#endif
+
+#define pi break
+
+#define fp a3
+
+###############################################################################
+#
+# build a stack frame from the registers
+# - the caller has subtracted 4 from SP before coming here
+#
+###############################################################################
+.macro SAVE_ALL
+	add	-4,sp				# next exception frame ptr save area
+	movm	[other],(sp)
+	mov	usp,a1
+	mov	a1,(sp)				# USP in MOVM[other] dummy slot
+	movm	[d2,d3,a2,a3,exreg0,exreg1,exother],(sp)
+	mov	sp,fp				# FRAME pointer in A3
+	add	-12,sp				# allow for calls to be made
+	mov	(__frame),a1
+	mov	a1,(REG_NEXT,fp)
+	mov	fp,(__frame)
+
+	and	~EPSW_FE,epsw			# disable the FPU inside the kernel
+
+	# we may be holding current in E2
+#ifdef CONFIG_MN10300_CURRENT_IN_E2
+	mov	(__current),e2
+#endif
+.endm
+
+###############################################################################
+#
+# restore the registers from a stack frame
+#
+###############################################################################
+.macro RESTORE_ALL
+	# peel back the stack to the calling frame
+	# - this permits execve() to discard extra frames due to kernel syscalls
+	mov	(__frame),fp
+	mov	fp,sp
+	mov	(REG_NEXT,fp),d0                # userspace has regs->next == 0
+	mov	d0,(__frame)
+
+#ifndef CONFIG_MN10300_USING_JTAG
+	mov	(REG_EPSW,fp),d0
+	btst	EPSW_T,d0
+	beq	99f
+
+	or	EPSW_NMID,epsw
+	movhu	(DCR),d1
+	or	0x0001, d1
+	movhu	d1,(DCR)
+
+99:
+#endif
+	movm	(sp),[d2,d3,a2,a3,exreg0,exreg1,exother]
+
+	# must restore usp even if returning to kernel space,
+	# when CONFIG_PREEMPT is enabled.
+	mov	(sp),a1				# USP in MOVM[other] dummy slot
+	mov	a1,usp
+
+	movm	(sp),[other]
+	add	8,sp
+	rti
+
+.endm
+
+
+#endif /* _ASM_FRAME_INC */
diff --git a/include/asm-mn10300/futex.h b/include/asm-mn10300/futex.h
new file mode 100644
index 000000000000..0b745828f42b
--- /dev/null
+++ b/include/asm-mn10300/futex.h
@@ -0,0 +1 @@
+#include <asm-generic/futex.h>
diff --git a/include/asm-mn10300/gdb-stub.h b/include/asm-mn10300/gdb-stub.h
new file mode 100644
index 000000000000..e5a6368559af
--- /dev/null
+++ b/include/asm-mn10300/gdb-stub.h
@@ -0,0 +1,183 @@
+/* MN10300 Kernel GDB stub definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from asm-mips/gdb-stub.h (c) 1995 Andreas Busse
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_GDB_STUB_H
+#define _ASM_GDB_STUB_H
+
+#include <asm/exceptions.h>
+
+/*
+ * register ID numbers in GDB remote protocol
+ */
+
+#define GDB_REGID_PC		9
+#define GDB_REGID_FP		7
+#define GDB_REGID_SP		8
+
+/*
+ * virtual stack layout for the GDB exception handler
+ */
+#define NUMREGS			64
+
+#define GDB_FR_D0		(0 * 4)
+#define GDB_FR_D1		(1 * 4)
+#define GDB_FR_D2		(2 * 4)
+#define GDB_FR_D3		(3 * 4)
+#define GDB_FR_A0		(4 * 4)
+#define GDB_FR_A1		(5 * 4)
+#define GDB_FR_A2		(6 * 4)
+#define GDB_FR_A3		(7 * 4)
+
+#define GDB_FR_SP		(8 * 4)
+#define GDB_FR_PC		(9 * 4)
+#define GDB_FR_MDR		(10 * 4)
+#define GDB_FR_EPSW		(11 * 4)
+#define GDB_FR_LIR		(12 * 4)
+#define GDB_FR_LAR		(13 * 4)
+#define GDB_FR_MDRQ		(14 * 4)
+
+#define GDB_FR_E0		(15 * 4)
+#define GDB_FR_E1		(16 * 4)
+#define GDB_FR_E2		(17 * 4)
+#define GDB_FR_E3		(18 * 4)
+#define GDB_FR_E4		(19 * 4)
+#define GDB_FR_E5		(20 * 4)
+#define GDB_FR_E6		(21 * 4)
+#define GDB_FR_E7		(22 * 4)
+
+#define GDB_FR_SSP		(23 * 4)
+#define GDB_FR_MSP		(24 * 4)
+#define GDB_FR_USP		(25 * 4)
+#define GDB_FR_MCRH		(26 * 4)
+#define GDB_FR_MCRL		(27 * 4)
+#define GDB_FR_MCVF		(28 * 4)
+
+#define GDB_FR_FPCR		(29 * 4)
+#define GDB_FR_DUMMY0		(30 * 4)
+#define GDB_FR_DUMMY1		(31 * 4)
+
+#define GDB_FR_FS0		(32 * 4)
+
+#define GDB_FR_SIZE		(NUMREGS * 4)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This is the same as above, but for the high-level
+ * part of the GDB stub.
+ */
+
+struct gdb_regs {
+	/* saved main processor registers */
+	u32	d0, d1, d2, d3, a0, a1, a2, a3;
+	u32	sp, pc, mdr, epsw, lir, lar, mdrq;
+	u32	e0, e1, e2, e3, e4, e5, e6, e7;
+	u32	ssp, msp, usp, mcrh, mcrl, mcvf;
+
+	/* saved floating point registers */
+	u32	fpcr, _dummy0, _dummy1;
+	u32	fs0,  fs1,  fs2,  fs3,  fs4,  fs5,  fs6,  fs7;
+	u32	fs8,  fs9,  fs10, fs11, fs12, fs13, fs14, fs15;
+	u32	fs16, fs17, fs18, fs19, fs20, fs21, fs22, fs23;
+	u32	fs24, fs25, fs26, fs27, fs28, fs29, fs30, fs31;
+};
+
+/*
+ * Prototypes
+ */
+extern void show_registers_only(struct pt_regs *regs);
+
+extern asmlinkage void gdbstub_init(void);
+extern asmlinkage void gdbstub_exit(int status);
+extern asmlinkage void gdbstub_io_init(void);
+extern asmlinkage void gdbstub_io_set_baud(unsigned baud);
+extern asmlinkage int  gdbstub_io_rx_char(unsigned char *_ch, int nonblock);
+extern asmlinkage void gdbstub_io_tx_char(unsigned char ch);
+extern asmlinkage void gdbstub_io_tx_flush(void);
+
+extern asmlinkage void gdbstub_io_rx_handler(void);
+extern asmlinkage void gdbstub_rx_irq(struct pt_regs *, enum exception_code);
+extern asmlinkage int  gdbstub_intercept(struct pt_regs *, enum exception_code);
+extern asmlinkage void gdbstub_exception(struct pt_regs *, enum exception_code);
+extern asmlinkage void __gdbstub_bug_trap(void);
+extern asmlinkage void __gdbstub_pause(void);
+extern asmlinkage void start_kernel(void);
+
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+extern asmlinkage void gdbstub_purge_cache(void);
+#else
+#define gdbstub_purge_cache()	do {} while (0)
+#endif
+
+/* Used to prevent crashes in memory access */
+extern asmlinkage int  gdbstub_read_byte(const u8 *, u8 *);
+extern asmlinkage int  gdbstub_read_word(const u8 *, u8 *);
+extern asmlinkage int  gdbstub_read_dword(const u8 *, u8 *);
+extern asmlinkage int  gdbstub_write_byte(u32, u8 *);
+extern asmlinkage int  gdbstub_write_word(u32, u8 *);
+extern asmlinkage int  gdbstub_write_dword(u32, u8 *);
+
+extern asmlinkage void gdbstub_read_byte_guard(void);
+extern asmlinkage void gdbstub_read_byte_cont(void);
+extern asmlinkage void gdbstub_read_word_guard(void);
+extern asmlinkage void gdbstub_read_word_cont(void);
+extern asmlinkage void gdbstub_read_dword_guard(void);
+extern asmlinkage void gdbstub_read_dword_cont(void);
+extern asmlinkage void gdbstub_write_byte_guard(void);
+extern asmlinkage void gdbstub_write_byte_cont(void);
+extern asmlinkage void gdbstub_write_word_guard(void);
+extern asmlinkage void gdbstub_write_word_cont(void);
+extern asmlinkage void gdbstub_write_dword_guard(void);
+extern asmlinkage void gdbstub_write_dword_cont(void);
+
+extern u8	gdbstub_rx_buffer[PAGE_SIZE];
+extern u32	gdbstub_rx_inp;
+extern u32	gdbstub_rx_outp;
+extern u8	gdbstub_rx_overflow;
+extern u8	gdbstub_busy;
+extern u8	gdbstub_rx_unget;
+
+#ifdef CONFIG_GDBSTUB_DEBUGGING
+extern void gdbstub_printk(const char *fmt, ...)
+	__attribute__((format(printf, 1, 2)));
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void gdbstub_printk(const char *fmt, ...)
+{
+}
+#endif
+
+#ifdef CONFIG_GDBSTUB_DEBUG_ENTRY
+#define gdbstub_entry(FMT, ...) gdbstub_printk(FMT, ##__VA_ARGS__)
+#else
+#define gdbstub_entry(FMT, ...) ({ 0; })
+#endif
+
+#ifdef CONFIG_GDBSTUB_DEBUG_PROTOCOL
+#define gdbstub_proto(FMT, ...) gdbstub_printk(FMT, ##__VA_ARGS__)
+#else
+#define gdbstub_proto(FMT, ...) ({ 0; })
+#endif
+
+#ifdef CONFIG_GDBSTUB_DEBUG_IO
+#define gdbstub_io(FMT, ...) gdbstub_printk(FMT, ##__VA_ARGS__)
+#else
+#define gdbstub_io(FMT, ...) ({ 0; })
+#endif
+
+#ifdef CONFIG_GDBSTUB_DEBUG_BREAKPOINT
+#define gdbstub_bkpt(FMT, ...) gdbstub_printk(FMT, ##__VA_ARGS__)
+#else
+#define gdbstub_bkpt(FMT, ...) ({ 0; })
+#endif
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_GDB_STUB_H */
diff --git a/include/asm-mn10300/hardirq.h b/include/asm-mn10300/hardirq.h
new file mode 100644
index 000000000000..54d950117674
--- /dev/null
+++ b/include/asm-mn10300/hardirq.h
@@ -0,0 +1,48 @@
+/* MN10300 Hardware IRQ statistics and management
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_HARDIRQ_H
+#define _ASM_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+#include <asm/exceptions.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+	unsigned int	__softirq_pending;
+	unsigned long	idle_timestamp;
+	unsigned int	__nmi_count;	/* arch dependent */
+	unsigned int	__irq_count;	/* arch dependent */
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+extern void ack_bad_irq(int irq);
+
+/*
+ * manipulate stubs in the MN10300 CPU Trap/Interrupt Vector table
+ * - these should jump to __common_exception in entry.S unless there's a good
+ *   reason to do otherwise (see trap_preinit() in traps.c)
+ */
+typedef void (*intr_stub_fnx)(struct pt_regs *regs,
+			      enum exception_code intcode);
+
+/*
+ * manipulate pointers in the Exception table (see entry.S)
+ * - these are indexed by decoding the lower 24 bits of the TBR register
+ * - note that the MN103E010 doesn't always trap through the correct vector,
+ *   but does always set the TBR correctly
+ */
+extern asmlinkage void set_excp_vector(enum exception_code code,
+				       intr_stub_fnx handler);
+
+#endif /* _ASM_HARDIRQ_H */
diff --git a/include/asm-mn10300/highmem.h b/include/asm-mn10300/highmem.h
new file mode 100644
index 000000000000..383c0c42982e
--- /dev/null
+++ b/include/asm-mn10300/highmem.h
@@ -0,0 +1,114 @@
+/* MN10300 Virtual kernel memory mappings for high memory
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from include/asm-i386/highmem.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/kmap_types.h>
+#include <asm/pgtable.h>
+
+/* undef for production */
+#undef HIGHMEM_DEBUG
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
+extern void __init kmap_init(void);
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+#define PKMAP_BASE	0xfe000000UL
+#define LAST_PKMAP	1024
+#define LAST_PKMAP_MASK (LAST_PKMAP - 1)
+#define PKMAP_NR(virt)  ((virt - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+extern unsigned long __fastcall kmap_high(struct page *page);
+extern void __fastcall kunmap_high(struct page *page);
+
+static inline unsigned long kmap(struct page *page)
+{
+	if (in_interrupt())
+		BUG();
+	if (page < highmem_start_page)
+		return page_address(page);
+	return kmap_high(page);
+}
+
+static inline void kunmap(struct page *page)
+{
+	if (in_interrupt())
+		BUG();
+	if (page < highmem_start_page)
+		return;
+	kunmap_high(page);
+}
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+static inline unsigned long kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	if (page < highmem_start_page)
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+	if (!pte_none(*(kmap_pte - idx)))
+		BUG();
+#endif
+	set_pte(kmap_pte - idx, mk_pte(page, kmap_prot));
+	__flush_tlb_one(vaddr);
+
+	return vaddr;
+}
+
+static inline void kunmap_atomic(unsigned long vaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+	enum fixed_addresses idx = type + KM_TYPE_NR * smp_processor_id();
+
+	if (vaddr < FIXADDR_START) /* FIXME */
+		return;
+
+	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx))
+		BUG();
+
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(kmap_pte - idx);
+	__flush_tlb_one(vaddr);
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff --git a/include/asm-mn10300/hw_irq.h b/include/asm-mn10300/hw_irq.h
new file mode 100644
index 000000000000..70619901098e
--- /dev/null
+++ b/include/asm-mn10300/hw_irq.h
@@ -0,0 +1,14 @@
+/* MN10300 Hardware interrupt definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/include/asm-mn10300/ide.h b/include/asm-mn10300/ide.h
new file mode 100644
index 000000000000..dc235121ec42
--- /dev/null
+++ b/include/asm-mn10300/ide.h
@@ -0,0 +1,43 @@
+/* MN10300 Arch-specific IDE code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from include/asm-i386/ide.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_IDE_H
+#define _ASM_IDE_H
+
+#ifdef __KERNEL__
+
+#include <asm/intctl-regs.h>
+
+#undef SUPPORT_SLOW_DATA_PORTS
+#define SUPPORT_SLOW_DATA_PORTS 0
+
+#undef SUPPORT_VLB_SYNC
+#define SUPPORT_VLB_SYNC 0
+
+#ifndef MAX_HWIFS
+#define MAX_HWIFS 8
+#endif
+
+/*
+ * some bits needed for parts of the IDE subsystem to compile
+ */
+#define __ide_mm_insw(port, addr, n) \
+	insw((unsigned long) (port), (addr), (n))
+#define __ide_mm_insl(port, addr, n) \
+	insl((unsigned long) (port), (addr), (n))
+#define __ide_mm_outsw(port, addr, n) \
+	outsw((unsigned long) (port), (addr), (n))
+#define __ide_mm_outsl(port, addr, n) \
+	outsl((unsigned long) (port), (addr), (n))
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_IDE_H */
diff --git a/include/asm-mn10300/intctl-regs.h b/include/asm-mn10300/intctl-regs.h
new file mode 100644
index 000000000000..ba544c796c5a
--- /dev/null
+++ b/include/asm-mn10300/intctl-regs.h
@@ -0,0 +1,73 @@
+/* MN10300 On-board interrupt controller registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_INTCTL_REGS_H
+#define _ASM_INTCTL_REGS_H
+
+#include <asm/cpu-regs.h>
+
+#ifdef __KERNEL__
+
+/* interrupt controller registers */
+#define GxICR(X)		__SYSREG(0xd4000000 + (X) * 4, u16)	/* group irq ctrl regs */
+
+#define IAGR			__SYSREG(0xd4000100, u16)	/* intr acceptance group reg */
+#define IAGR_GN			0x00fc		/* group number register
+						 * (documentation _has_ to be wrong)
+						 */
+
+#define EXTMD			__SYSREG(0xd4000200, u16)	/* external pin intr spec reg */
+#define GET_XIRQ_TRIGGER(X) ((EXTMD >> ((X) * 2)) & 3)
+
+#define SET_XIRQ_TRIGGER(X,Y)			\
+do {						\
+	u16 x = EXTMD;				\
+	x &= ~(3 << ((X) * 2));			\
+	x |= ((Y) & 3) << ((X) * 2);		\
+	EXTMD = x;				\
+} while (0)
+
+#define XIRQ_TRIGGER_LOWLEVEL	0
+#define XIRQ_TRIGGER_HILEVEL	1
+#define XIRQ_TRIGGER_NEGEDGE	2
+#define XIRQ_TRIGGER_POSEDGE	3
+
+/* non-maskable interrupt control */
+#define NMIIRQ			0
+#define NMICR			GxICR(NMIIRQ)	/* NMI control register */
+#define NMICR_NMIF		0x0001		/* NMI pin interrupt flag */
+#define NMICR_WDIF		0x0002		/* watchdog timer overflow flag */
+#define NMICR_ABUSERR		0x0008		/* async bus error flag */
+
+/* maskable interrupt control */
+#define GxICR_DETECT		0x0001		/* interrupt detect flag */
+#define GxICR_REQUEST		0x0010		/* interrupt request flag */
+#define GxICR_ENABLE		0x0100		/* interrupt enable flag */
+#define GxICR_LEVEL		0x7000		/* interrupt priority level */
+#define GxICR_LEVEL_0		0x0000		/* - level 0 */
+#define GxICR_LEVEL_1		0x1000		/* - level 1 */
+#define GxICR_LEVEL_2		0x2000		/* - level 2 */
+#define GxICR_LEVEL_3		0x3000		/* - level 3 */
+#define GxICR_LEVEL_4		0x4000		/* - level 4 */
+#define GxICR_LEVEL_5		0x5000		/* - level 5 */
+#define GxICR_LEVEL_6		0x6000		/* - level 6 */
+#define GxICR_LEVEL_SHIFT	12
+
+#ifndef __ASSEMBLY__
+extern void set_intr_level(int irq, u16 level);
+extern void set_intr_postackable(int irq);
+#endif
+
+/* external interrupts */
+#define XIRQxICR(X)		GxICR((X))	/* external interrupt control regs */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_INTCTL_REGS_H */
diff --git a/include/asm-mn10300/io.h b/include/asm-mn10300/io.h
new file mode 100644
index 000000000000..b8b6dc878250
--- /dev/null
+++ b/include/asm-mn10300/io.h
@@ -0,0 +1,299 @@
+/* MN10300 I/O port emulation and memory-mapped I/O
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <asm/page.h> /* I/O is all done through memory accesses */
+#include <asm/cpu-regs.h>
+#include <asm/cacheflush.h>
+
+#define mmiowb() do {} while (0)
+
+/*****************************************************************************/
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+static inline u8 readb(const volatile void __iomem *addr)
+{
+	return *(const volatile u8 *) addr;
+}
+
+static inline u16 readw(const volatile void __iomem *addr)
+{
+	return *(const volatile u16 *) addr;
+}
+
+static inline u32 readl(const volatile void __iomem *addr)
+{
+	return *(const volatile u32 *) addr;
+}
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+
+static inline void writeb(u8 b, volatile void __iomem *addr)
+{
+	*(volatile u8 *) addr = b;
+}
+
+static inline void writew(u16 b, volatile void __iomem *addr)
+{
+	*(volatile u16 *) addr = b;
+}
+
+static inline void writel(u32 b, volatile void __iomem *addr)
+{
+	*(volatile u32 *) addr = b;
+}
+
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+/*****************************************************************************/
+/*
+ * traditional input/output functions
+ */
+static inline u8 inb_local(unsigned long addr)
+{
+	return readb((volatile void __iomem *) addr);
+}
+
+static inline void outb_local(u8 b, unsigned long addr)
+{
+	return writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline u8 inb(unsigned long addr)
+{
+	return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+	return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+	return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+	return writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+	return writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+	return writel(b, (volatile void __iomem *) addr);
+}
+
+#define inb_p(addr)	inb(addr)
+#define inw_p(addr)	inw(addr)
+#define inl_p(addr)	inl(addr)
+#define outb_p(x, addr)	outb((x), (addr))
+#define outw_p(x, addr)	outw((x), (addr))
+#define outl_p(x, addr)	outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u8 *buf = buffer;
+		do {
+			u8 x = inb(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u16 *buf = buffer;
+		do {
+			u16 x = inw(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u32 *buf = buffer;
+		do {
+			u32 x = inl(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u8 *buf = buffer;
+		do {
+			outb(*buf++, addr);
+		} while (--count);
+	}
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u16 *buf = buffer;
+		do {
+			outw(*buf++, addr);
+		} while (--count);
+	}
+}
+
+extern void __outsl(unsigned long addr, const void *buffer, int count);
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+	if ((unsigned long) buffer & 0x3)
+		return __outsl(addr, buffer, count);
+
+	if (count) {
+		const u32 *buf = buffer;
+		do {
+			outl(*buf++, addr);
+		} while (--count);
+	}
+}
+
+#define ioread8(addr)		readb(addr)
+#define ioread16(addr)		readw(addr)
+#define ioread32(addr)		readl(addr)
+
+#define iowrite8(v, addr)	writeb((v), (addr))
+#define iowrite16(v, addr)	writew((v), (addr))
+#define iowrite32(v, addr)	writel((v), (addr))
+
+#define ioread8_rep(p, dst, count) \
+	insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+	insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+	insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+	outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+	outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+	outsl((unsigned long) (p), (src), (count))
+
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+#define __io_virt(x) ((void *) (x))
+
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
+{
+}
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+	return __pa(address);
+}
+
+static inline void *phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+static inline void *__ioremap(unsigned long offset, unsigned long size,
+			      unsigned long flags)
+{
+	return (void *) offset;
+}
+
+static inline void *ioremap(unsigned long offset, unsigned long size)
+{
+	return (void *) offset;
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that
+ * area.  it's useful if some control registers are in such an area and write
+ * combining or read caching is not desirable:
+ */
+static inline void *ioremap_nocache(unsigned long offset, unsigned long size)
+{
+	return (void *) (offset | 0x20000000);
+}
+
+static inline void iounmap(void *addr)
+{
+}
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *) port;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
+#define xlate_dev_kmem_ptr(p)	((void *) (p))
+#define xlate_dev_mem_ptr(p)	((void *) (p))
+
+/*
+ * PCI bus iomem addresses must be in the region 0x80000000-0x9fffffff
+ */
+static inline unsigned long virt_to_bus(volatile void *address)
+{
+	return ((unsigned long) address) & ~0x20000000;
+}
+
+static inline void *bus_to_virt(unsigned long address)
+{
+	return (void *) address;
+}
+
+#define page_to_bus page_to_phys
+
+#define memset_io(a, b, c)	memset(__io_virt(a), (b), (c))
+#define memcpy_fromio(a, b, c)	memcpy((a), __io_virt(b), (c))
+#define memcpy_toio(a, b, c)	memcpy(__io_virt(a), (b), (c))
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IO_H */
diff --git a/include/asm-mn10300/ioctl.h b/include/asm-mn10300/ioctl.h
new file mode 100644
index 000000000000..b279fe06dfe5
--- /dev/null
+++ b/include/asm-mn10300/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/include/asm-mn10300/ioctls.h b/include/asm-mn10300/ioctls.h
new file mode 100644
index 000000000000..dcbfb452974f
--- /dev/null
+++ b/include/asm-mn10300/ioctls.h
@@ -0,0 +1,88 @@
+#ifndef _ASM_IOCTLS_H
+#define _ASM_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		0x5401
+#define TCSETS		0x5402
+#define TCSETSW		0x5403
+#define TCSETSF		0x5404
+#define TCGETA		0x5405
+#define TCSETA		0x5406
+#define TCSETAW		0x5407
+#define TCSETAF		0x5408
+#define TCSBRK		0x5409
+#define TCXONC		0x540A
+#define TCFLSH		0x540B
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	0x540F
+#define TIOCSPGRP	0x5410
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TCGETS2		_IOR('T', 0x2A, struct termios2)
+#define TCSETS2		_IOW('T', 0x2B, struct termios2)
+#define TCSETSW2	_IOW('T', 0x2C, struct termios2)
+#define TCSETSF2	_IOW('T', 0x2D, struct termios2)
+#define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number
+						       * (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX	0x5450
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+#define FIOQSIZE	0x5460
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+
+#endif /* _ASM_IOCTLS_H */
diff --git a/include/asm-mn10300/ipc.h b/include/asm-mn10300/ipc.h
new file mode 100644
index 000000000000..a46e3d9c2a3f
--- /dev/null
+++ b/include/asm-mn10300/ipc.h
@@ -0,0 +1 @@
+#include <asm-generic/ipc.h>
diff --git a/include/asm-mn10300/ipcbuf.h b/include/asm-mn10300/ipcbuf.h
new file mode 100644
index 000000000000..efbbef8d1c69
--- /dev/null
+++ b/include/asm-mn10300/ipcbuf.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_IPCBUF_H_
+#define _ASM_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for MN10300 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
+	__kernel_mode_t		mode;
+	unsigned short		__pad1;
+	unsigned short		seq;
+	unsigned short		__pad2;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+#endif /* _ASM_IPCBUF_H */
diff --git a/include/asm-mn10300/irq.h b/include/asm-mn10300/irq.h
new file mode 100644
index 000000000000..53b380116901
--- /dev/null
+++ b/include/asm-mn10300/irq.h
@@ -0,0 +1,32 @@
+/* MN10300 Hardware interrupt definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ * - Derived from include/asm-i386/irq.h:
+ *   - (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+#include <asm/intctl-regs.h>
+#include <asm/reset-regs.h>
+#include <asm/proc/irq.h>
+
+/* this number is used when no interrupt has been assigned */
+#define NO_IRQ		INT_MAX
+
+/* hardware irq numbers */
+#define NR_IRQS		GxICR_NUM_IRQS
+
+/* external hardware irq numbers */
+#define NR_XIRQS	GxICR_NUM_XIRQS
+
+#define irq_canonicalize(IRQ) (IRQ)
+
+#endif /* _ASM_IRQ_H */
diff --git a/include/asm-mn10300/irq_regs.h b/include/asm-mn10300/irq_regs.h
new file mode 100644
index 000000000000..a848cd232eb4
--- /dev/null
+++ b/include/asm-mn10300/irq_regs.h
@@ -0,0 +1,24 @@
+/* MN10300 IRQ registers pointer definition
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_IRQ_REGS_H
+#define _ASM_IRQ_REGS_H
+
+/*
+ * Per-cpu current frame pointer - the location of the last exception frame on
+ * the stack
+ */
+#define ARCH_HAS_OWN_IRQ_REGS
+
+#ifndef __ASSEMBLY__
+#define get_irq_regs() (__frame)
+#endif
+
+#endif /* _ASM_IRQ_REGS_H */
diff --git a/include/asm-mn10300/kdebug.h b/include/asm-mn10300/kdebug.h
new file mode 100644
index 000000000000..0f47e112190c
--- /dev/null
+++ b/include/asm-mn10300/kdebug.h
@@ -0,0 +1,22 @@
+/* MN10300 In-kernel death knells
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_KDEBUG_H
+#define _ASM_KDEBUG_H
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BREAKPOINT,
+	DIE_GPF,
+};
+
+#endif /* _ASM_KDEBUG_H */
diff --git a/include/asm-mn10300/kmap_types.h b/include/asm-mn10300/kmap_types.h
new file mode 100644
index 000000000000..3398f9f35603
--- /dev/null
+++ b/include/asm-mn10300/kmap_types.h
@@ -0,0 +1,31 @@
+/* MN10300 kmap_atomic() slot IDs
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+enum km_type {
+	KM_BOUNCE_READ,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	KM_TYPE_NR
+};
+
+#endif /* _ASM_KMAP_TYPES_H */
diff --git a/include/asm-mn10300/kprobes.h b/include/asm-mn10300/kprobes.h
new file mode 100644
index 000000000000..c800b590183a
--- /dev/null
+++ b/include/asm-mn10300/kprobes.h
@@ -0,0 +1,50 @@
+/* MN10300 Kernel Probes support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by Mark Salter (msalter@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public Licence as published by
+ * the Free Software Foundation; either version 2 of the Licence, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public Licence for more details.
+ *
+ * You should have received a copy of the GNU General Public Licence
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct kprobe;
+
+typedef unsigned char kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xff
+#define MAX_INSN_SIZE 8
+#define MAX_STACK_SIZE 128
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/*  copy of original instruction
+	 */
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+};
+
+extern const int kretprobe_blacklist_size;
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+
+#define flush_insn_slot(p)  do {} while (0)
+
+extern void arch_remove_kprobe(struct kprobe *p);
+
+#endif /* _ASM_KPROBES_H */
diff --git a/include/asm-mn10300/linkage.h b/include/asm-mn10300/linkage.h
new file mode 100644
index 000000000000..29a32e467523
--- /dev/null
+++ b/include/asm-mn10300/linkage.h
@@ -0,0 +1,22 @@
+/* MN10300 Linkage and calling-convention overrides
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_LINKAGE_H
+#define _ASM_LINKAGE_H
+
+/* don't override anything */
+#define asmlinkage
+#define FASTCALL(x) x
+#define fastcall
+
+#define __ALIGN		.align 4,0xcb
+#define __ALIGN_STR	".align 4,0xcb"
+
+#endif
diff --git a/include/asm-mn10300/local.h b/include/asm-mn10300/local.h
new file mode 100644
index 000000000000..c11c530f74d0
--- /dev/null
+++ b/include/asm-mn10300/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/include/asm-mn10300/mc146818rtc.h b/include/asm-mn10300/mc146818rtc.h
new file mode 100644
index 000000000000..df6bc6e0e8c6
--- /dev/null
+++ b/include/asm-mn10300/mc146818rtc.h
@@ -0,0 +1 @@
+#include <asm/rtc-regs.h>
diff --git a/include/asm-mn10300/mman.h b/include/asm-mn10300/mman.h
new file mode 100644
index 000000000000..b7986b65addf
--- /dev/null
+++ b/include/asm-mn10300/mman.h
@@ -0,0 +1,28 @@
+/* MN10300 Constants for mmap and co.
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * - Derived from asm-x86/mman.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_MMAN_H
+#define _ASM_MMAN_H
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* _ASM_MMAN_H */
diff --git a/include/asm-mn10300/mmu.h b/include/asm-mn10300/mmu.h
new file mode 100644
index 000000000000..2d2d097e7309
--- /dev/null
+++ b/include/asm-mn10300/mmu.h
@@ -0,0 +1,19 @@
+/* MN10300 Memory management context
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from include/asm-frv/mmu.h
+ */
+
+#ifndef _ASM_MMU_H
+#define _ASM_MMU_H
+
+/*
+ * MMU context
+ */
+typedef struct {
+	unsigned long	tlbpid[NR_CPUS];	/* TLB PID for this process on
+						 * each CPU */
+} mm_context_t;
+
+#endif /* _ASM_MMU_H */
diff --git a/include/asm-mn10300/mmu_context.h b/include/asm-mn10300/mmu_context.h
new file mode 100644
index 000000000000..a9e2e34f69b0
--- /dev/null
+++ b/include/asm-mn10300/mmu_context.h
@@ -0,0 +1,138 @@
+/* MN10300 MMU context management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ * - Derived from include/asm-m32r/mmu_context.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * This implements an algorithm to provide TLB PID mappings to provide
+ * selective access to the TLB for processes, thus reducing the number of TLB
+ * flushes required.
+ *
+ * Note, however, that the M32R algorithm is technically broken as it does not
+ * handle version wrap-around, and could, theoretically, have a problem with a
+ * very long lived program that sleeps long enough for the version number to
+ * wrap all the way around so that its TLB mappings appear valid once again.
+ */
+#ifndef _ASM_MMU_CONTEXT_H
+#define _ASM_MMU_CONTEXT_H
+
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm-generic/mm_hooks.h>
+
+#define MMU_CONTEXT_TLBPID_MASK		0x000000ffUL
+#define MMU_CONTEXT_VERSION_MASK	0xffffff00UL
+#define MMU_CONTEXT_FIRST_VERSION	0x00000100UL
+#define MMU_NO_CONTEXT			0x00000000UL
+
+extern unsigned long mmu_context_cache[NR_CPUS];
+#define mm_context(mm)	(mm->context.tlbpid[smp_processor_id()])
+
+#define enter_lazy_tlb(mm, tsk)	do {} while (0)
+
+#ifdef CONFIG_SMP
+#define cpu_ran_vm(cpu, task) \
+	cpu_set((cpu), (task)->cpu_vm_mask)
+#define cpu_maybe_ran_vm(cpu, task) \
+	cpu_test_and_set((cpu), (task)->cpu_vm_mask)
+#else
+#define cpu_ran_vm(cpu, task)		do {} while (0)
+#define cpu_maybe_ran_vm(cpu, task)	true
+#endif /* CONFIG_SMP */
+
+/*
+ * allocate an MMU context
+ */
+static inline unsigned long allocate_mmu_context(struct mm_struct *mm)
+{
+	unsigned long *pmc = &mmu_context_cache[smp_processor_id()];
+	unsigned long mc = ++(*pmc);
+
+	if (!(mc & MMU_CONTEXT_TLBPID_MASK)) {
+		/* we exhausted the TLB PIDs of this version on this CPU, so we
+		 * flush this CPU's TLB in its entirety and start new cycle */
+		flush_tlb_all();
+
+		/* fix the TLB version if needed (we avoid version #0 so as to
+		 * distingush MMU_NO_CONTEXT) */
+		if (!mc)
+			*pmc = mc = MMU_CONTEXT_FIRST_VERSION;
+	}
+	mm_context(mm) = mc;
+	return mc;
+}
+
+/*
+ * get an MMU context if one is needed
+ */
+static inline unsigned long get_mmu_context(struct mm_struct *mm)
+{
+	unsigned long mc = MMU_NO_CONTEXT, cache;
+
+	if (mm) {
+		cache = mmu_context_cache[smp_processor_id()];
+		mc = mm_context(mm);
+
+		/* if we have an old version of the context, replace it */
+		if ((mc ^ cache) & MMU_CONTEXT_VERSION_MASK)
+			mc = allocate_mmu_context(mm);
+	}
+	return mc;
+}
+
+/*
+ * initialise the context related info for a new mm_struct instance
+ */
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	int num_cpus = NR_CPUS, i;
+
+	for (i = 0; i < num_cpus; i++)
+		mm->context.tlbpid[i] = MMU_NO_CONTEXT;
+	return 0;
+}
+
+/*
+ * destroy context related info for an mm_struct that is about to be put to
+ * rest
+ */
+#define destroy_context(mm)	do { } while (0)
+
+/*
+ * after we have set current->mm to a new value, this activates the context for
+ * the new mm so we see the new mappings.
+ */
+static inline void activate_context(struct mm_struct *mm, int cpu)
+{
+	PIDR = get_mmu_context(mm) & MMU_CONTEXT_TLBPID_MASK;
+}
+
+/*
+ * change between virtual memory sets
+ */
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	int cpu = smp_processor_id();
+
+	if (prev != next) {
+		cpu_ran_vm(cpu, next);
+		activate_context(next, cpu);
+		PTBR = (unsigned long) next->pgd;
+	} else if (!cpu_maybe_ran_vm(cpu, next)) {
+		activate_context(next, cpu);
+	}
+}
+
+#define deactivate_mm(tsk, mm)	do {} while (0)
+#define activate_mm(prev, next)	switch_mm((prev), (next), NULL)
+
+#endif /* _ASM_MMU_CONTEXT_H */
diff --git a/include/asm-mn10300/module.h b/include/asm-mn10300/module.h
new file mode 100644
index 000000000000..5d7057d01494
--- /dev/null
+++ b/include/asm-mn10300/module.h
@@ -0,0 +1,27 @@
+/* MN10300 Arch-specific module definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by Mark Salter (msalter@redhat.com)
+ * Derived from include/asm-i386/module.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_MODULE_H
+#define _ASM_MODULE_H
+
+struct mod_arch_specific {
+};
+
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Ehdr	Elf32_Ehdr
+
+/*
+ * Include the MN10300 architecture version.
+ */
+#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " "
+
+#endif /* _ASM_MODULE_H */
diff --git a/include/asm-mn10300/msgbuf.h b/include/asm-mn10300/msgbuf.h
new file mode 100644
index 000000000000..8b602450cc4a
--- /dev/null
+++ b/include/asm-mn10300/msgbuf.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_MSGBUF_H
+#define _ASM_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for MN10300 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm	msg_perm;
+	__kernel_time_t		msg_stime;	/* last msgsnd time */
+	unsigned long		__unused1;
+	__kernel_time_t		msg_rtime;	/* last msgrcv time */
+	unsigned long		__unused2;
+	__kernel_time_t		msg_ctime;	/* last change time */
+	unsigned long		__unused3;
+	unsigned long		msg_cbytes;	/* current number of bytes on queue */
+	unsigned long		msg_qnum;	/* number of messages in queue */
+	unsigned long		msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t		msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t		msg_lrpid;	/* last receive pid */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+#endif /* _ASM_MSGBUF_H */
diff --git a/include/asm-mn10300/mutex.h b/include/asm-mn10300/mutex.h
new file mode 100644
index 000000000000..84f5490c6fb4
--- /dev/null
+++ b/include/asm-mn10300/mutex.h
@@ -0,0 +1,16 @@
+/* MN10300 Mutex fastpath
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+#include <asm-generic/mutex-null.h>
diff --git a/include/asm-mn10300/namei.h b/include/asm-mn10300/namei.h
new file mode 100644
index 000000000000..bd9ce94aeb65
--- /dev/null
+++ b/include/asm-mn10300/namei.h
@@ -0,0 +1,22 @@
+/* Emulation stuff
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_NAMEI_H
+#define _ASM_NAMEI_H
+
+/* This dummy routine maybe changed to something useful
+ * for /usr/gnemul/ emulation stuff.
+ * Look at asm-sparc/namei.h for details.
+ */
+
+#define __emul_prefix() NULL
+
+#endif /* _ASM_NAMEI_H */
diff --git a/include/asm-mn10300/nmi.h b/include/asm-mn10300/nmi.h
new file mode 100644
index 000000000000..f3671cbbc117
--- /dev/null
+++ b/include/asm-mn10300/nmi.h
@@ -0,0 +1,14 @@
+/* MN10300 NMI handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_NMI_H
+#define _ASM_NMI_H
+
+#endif /* _ASM_NMI_H */
diff --git a/include/asm-mn10300/page.h b/include/asm-mn10300/page.h
new file mode 100644
index 000000000000..124971b9fb9b
--- /dev/null
+++ b/include/asm-mn10300/page.h
@@ -0,0 +1,131 @@
+/* MN10300 Page table definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PAGE_H
+#define _ASM_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+
+#ifndef __ASSEMBLY__
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE - 1))
+#else
+#define PAGE_SIZE	+(1 << PAGE_SHIFT)	/* unary plus marks an
+						 * immediate val not an addr */
+#define PAGE_MASK	+(~(PAGE_SIZE - 1))
+#endif
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to, from)	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+
+#define clear_user_page(addr, vaddr, page)	clear_page(addr)
+#define copy_user_page(vto, vfrom, vaddr, to)	copy_page(vto, vfrom)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define PTE_MASK	PAGE_MASK
+#define HPAGE_SHIFT	22
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#endif
+
+#define pte_val(x)	((x).pte)
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) })
+#define __pgd(x)	((pgd_t) { (x) })
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+#include <asm-generic/pgtable-nopmd.h>
+
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+/*
+ * This handles the memory map.. We could make this a config
+ * option, but too many people screw it up, and too few need
+ * it.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB.
+ */
+
+#ifndef __ASSEMBLY__
+
+/* Pure 2^n version of get_order */
+static inline int get_order(unsigned long size) __attribute__((const));
+static inline int get_order(unsigned long size)
+{
+	int order;
+
+	size = (size - 1) >> (PAGE_SHIFT - 1);
+	order = -1;
+	do {
+		size >>= 1;
+		order++;
+	} while (size);
+	return order;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#include <asm/page_offset.h>
+
+#define __PAGE_OFFSET		(PAGE_OFFSET_RAW)
+#define PAGE_OFFSET		((unsigned long) __PAGE_OFFSET)
+
+/*
+ * main RAM and kernel working space are coincident at 0x90000000, but to make
+ * life more interesting, there's also an uncached virtual shadow at 0xb0000000
+ * - these mappings are fixed in the MMU
+ */
+#define __pfn_disp		(CONFIG_KERNEL_RAM_BASE_ADDRESS >> PAGE_SHIFT)
+
+#define __pa(x)			((unsigned long)(x))
+#define __va(x)			((void *)(unsigned long)(x))
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define pfn_to_page(pfn)	(mem_map + ((pfn) - __pfn_disp))
+#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + __pfn_disp)
+
+#define pfn_valid(pfn)					\
+({							\
+	unsigned long __pfn = (pfn) - __pfn_disp;	\
+	__pfn < max_mapnr;				\
+})
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_PAGE_H */
diff --git a/include/asm-mn10300/page_offset.h b/include/asm-mn10300/page_offset.h
new file mode 100644
index 000000000000..8eb5b16ad86b
--- /dev/null
+++ b/include/asm-mn10300/page_offset.h
@@ -0,0 +1,11 @@
+/* MN10300 Kernel base address
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+#ifndef _ASM_PAGE_OFFSET_H
+#define _ASM_PAGE_OFFSET_H
+
+#define PAGE_OFFSET_RAW CONFIG_KERNEL_RAM_BASE_ADDRESS
+
+#endif
diff --git a/include/asm-mn10300/param.h b/include/asm-mn10300/param.h
new file mode 100644
index 000000000000..54b883ec3906
--- /dev/null
+++ b/include/asm-mn10300/param.h
@@ -0,0 +1,34 @@
+/* MN10300 Kernel parameters
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PARAM_H
+#define _ASM_PARAM_H
+
+#ifdef __KERNEL__
+#define HZ		1000		/* Internal kernel timer frequency */
+#define USER_HZ		100		/* .. some user interfaces are in
+					 * "ticks" */
+#define CLOCKS_PER_SEC	(USER_HZ)	/* like times() */
+#endif
+
+#ifndef HZ
+#define HZ		100
+#endif
+
+#define EXEC_PAGESIZE	4096
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+#define COMMAND_LINE_SIZE 256
+
+#endif /* _ASM_PARAM_H */
diff --git a/include/asm-mn10300/pci.h b/include/asm-mn10300/pci.h
new file mode 100644
index 000000000000..205192c52bb5
--- /dev/null
+++ b/include/asm-mn10300/pci.h
@@ -0,0 +1,133 @@
+/* MN10300 PCI definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PCI_H
+#define _ASM_PCI_H
+
+#ifdef __KERNEL__
+#include <linux/mm.h>		/* for struct page */
+
+#if 0
+#define __pcbdebug(FMT, ADDR, ...) \
+	printk(KERN_DEBUG "PCIBRIDGE[%08x]: "FMT"\n", \
+	       (u32)(ADDR), ##__VA_ARGS__)
+
+#define __pcidebug(FMT, BUS, DEVFN, WHERE,...)		\
+do {							\
+	printk(KERN_DEBUG "PCI[%02x:%02x.%x + %02x]: "FMT"\n",	\
+	       (BUS)->number,					\
+	       PCI_SLOT(DEVFN),					\
+	       PCI_FUNC(DEVFN),					\
+	       (u32)(WHERE), ##__VA_ARGS__);			\
+} while (0)
+
+#else
+#define __pcbdebug(FMT, ADDR, ...)		do {} while (0)
+#define __pcidebug(FMT, BUS, DEVFN, WHERE, ...)	do {} while (0)
+#endif
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ * already-configured bus numbers - to be used for buggy BIOSes or
+ * architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+#define pcibios_assign_all_busses()	1
+extern void unit_pci_init(void);
+#else
+#define pcibios_assign_all_busses()	0
+#endif
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO		0xBE000004
+#define PCIBIOS_MIN_MEM		0xB8000000
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(1)
+
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)	(0)
+
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state,
+			       int write_combine);
+
+#endif /* __KERNEL__ */
+
+/* implement the pci_ DMA API in terms of the generic device dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+/**
+ * pcibios_resource_to_bus - convert resource to PCI bus address
+ * @dev: device which owns this resource
+ * @region: converted bus-centric region (start,end)
+ * @res: resource to convert
+ *
+ * Convert a resource to a PCI device bus address or bus window.
+ */
+extern void pcibios_resource_to_bus(struct pci_dev *dev,
+				    struct pci_bus_region *region,
+				    struct resource *res);
+
+extern void pcibios_bus_to_resource(struct pci_dev *dev,
+				    struct resource *res,
+				    struct pci_bus_region *region);
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
+
+#define pcibios_scan_all_fns(a, b)	0
+
+#endif /* _ASM_PCI_H */
diff --git a/include/asm-mn10300/percpu.h b/include/asm-mn10300/percpu.h
new file mode 100644
index 000000000000..06a959d67234
--- /dev/null
+++ b/include/asm-mn10300/percpu.h
@@ -0,0 +1 @@
+#include <asm-generic/percpu.h>
diff --git a/include/asm-mn10300/pgalloc.h b/include/asm-mn10300/pgalloc.h
new file mode 100644
index 000000000000..ec057e1bd4cf
--- /dev/null
+++ b/include/asm-mn10300/pgalloc.h
@@ -0,0 +1,56 @@
+/* MN10300 Page and page table/directory allocation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PGALLOC_H
+#define _ASM_PGALLOC_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <linux/threads.h>
+#include <linux/mm.h>		/* for struct page */
+
+struct mm_struct;
+struct page;
+
+/* attach a page table to a PMD entry */
+#define pmd_populate_kernel(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE))
+
+static inline
+void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+{
+	set_pmd(pmd, __pmd((page_to_pfn(pte) << PAGE_SHIFT) | _PAGE_TABLE));
+}
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/*
+ * Allocate and free page tables.
+ */
+
+extern pgd_t *pgd_alloc(struct mm_struct *);
+extern void pgd_free(struct mm_struct *, pgd_t *);
+
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
+extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long) pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+	__free_page(pte);
+}
+
+
+#define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
+
+#endif /* _ASM_PGALLOC_H */
diff --git a/include/asm-mn10300/pgtable.h b/include/asm-mn10300/pgtable.h
new file mode 100644
index 000000000000..375c4941deda
--- /dev/null
+++ b/include/asm-mn10300/pgtable.h
@@ -0,0 +1,489 @@
+/* MN10300 Page table manipulators and constants
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree for the purposes of the MN10300 TLB handler
+ * functions.
+ */
+#ifndef _ASM_PGTABLE_H
+#define _ASM_PGTABLE_H
+
+#include <asm/cpu-regs.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <linux/threads.h>
+
+#include <asm/bitops.h>
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern unsigned long empty_zero_page[1024];
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
+
+extern void pmd_ctor(void *, struct kmem_cache *, unsigned long);
+extern void pgtable_cache_init(void);
+extern void paging_init(void);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The Linux mn10300 paging architecture only implements both the traditional
+ * 2-level page tables
+ */
+#define PGDIR_SHIFT	22
+#define PTRS_PER_PGD	1024
+#define PTRS_PER_PUD	1	/* we don't really have any PUD physically */
+#define PTRS_PER_PMD	1	/* we don't really have any PMD physically */
+#define PTRS_PER_PTE	1024
+
+#define PGD_SIZE	PAGE_SIZE
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0
+
+#define USER_PGD_PTRS		(PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS		(PTRS_PER_PGD - USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT	22
+#define BOOT_USER_PGD_PTRS	(__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS	(1024 - BOOT_USER_PGD_PTRS)
+
+#ifndef __ASSEMBLY__
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+#endif
+
+/*
+ * Unfortunately, due to the way the MMU works on the MN10300, the vmalloc VM
+ * area has to be in the lower half of the virtual address range (the upper
+ * half is not translated through the TLB).
+ *
+ * So in this case, the vmalloc area goes at the bottom of the address map
+ * (leaving a hole at the very bottom to catch addressing errors), and
+ * userspace starts immediately above.
+ *
+ * The vmalloc() routines also leaves a hole of 4kB between each vmalloced
+ * area to catch addressing errors.
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+#define VMALLOC_START	(0x70000000)
+#define VMALLOC_END	(0x7C000000)
+
+#ifndef __ASSEMBLY__
+extern pte_t kernel_vmalloc_ptes[(VMALLOC_END - VMALLOC_START) / PAGE_SIZE];
+#endif
+
+/* IPTEL/DPTEL bit assignments */
+#define _PAGE_BIT_VALID		xPTEL_V_BIT
+#define _PAGE_BIT_ACCESSED	xPTEL_UNUSED1_BIT	/* mustn't be loaded into IPTEL/DPTEL */
+#define _PAGE_BIT_NX		xPTEL_UNUSED2_BIT	/* mustn't be loaded into IPTEL/DPTEL */
+#define _PAGE_BIT_CACHE		xPTEL_C_BIT
+#define _PAGE_BIT_PRESENT	xPTEL_PV_BIT
+#define _PAGE_BIT_DIRTY		xPTEL_D_BIT
+#define _PAGE_BIT_GLOBAL	xPTEL_G_BIT
+
+#define _PAGE_VALID		xPTEL_V
+#define _PAGE_ACCESSED		xPTEL_UNUSED1
+#define _PAGE_NX		xPTEL_UNUSED2		/* no-execute bit */
+#define _PAGE_CACHE		xPTEL_C
+#define _PAGE_PRESENT		xPTEL_PV
+#define _PAGE_DIRTY		xPTEL_D
+#define _PAGE_PROT		xPTEL_PR
+#define _PAGE_PROT_RKNU		xPTEL_PR_ROK
+#define _PAGE_PROT_WKNU		xPTEL_PR_RWK
+#define _PAGE_PROT_RKRU		xPTEL_PR_ROK_ROU
+#define _PAGE_PROT_WKRU		xPTEL_PR_RWK_ROU
+#define _PAGE_PROT_WKWU		xPTEL_PR_RWK_RWU
+#define _PAGE_GLOBAL		xPTEL_G
+#define _PAGE_PSE		xPTEL_PS_4Mb		/* 4MB page */
+
+#define _PAGE_FILE		xPTEL_UNUSED1_BIT	/* set:pagecache unset:swap */
+
+#define __PAGE_PROT_UWAUX	0x040
+#define __PAGE_PROT_USER	0x080
+#define __PAGE_PROT_WRITE	0x100
+
+#define _PAGE_PRESENTV		(_PAGE_PRESENT|_PAGE_VALID)
+#define _PAGE_PROTNONE		0x000	/* If not present */
+
+#ifndef __ASSEMBLY__
+
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+
+#define _PAGE_TABLE	(_PAGE_PRESENTV | _PAGE_PROT_WKNU | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define __PAGE_NONE	(_PAGE_PRESENTV | _PAGE_PROT_RKNU | _PAGE_ACCESSED | _PAGE_CACHE)
+#define __PAGE_SHARED	(_PAGE_PRESENTV | _PAGE_PROT_WKWU | _PAGE_ACCESSED | _PAGE_CACHE)
+#define __PAGE_COPY	(_PAGE_PRESENTV | _PAGE_PROT_RKRU | _PAGE_ACCESSED | _PAGE_CACHE)
+#define __PAGE_READONLY	(_PAGE_PRESENTV | _PAGE_PROT_RKRU | _PAGE_ACCESSED | _PAGE_CACHE)
+
+#define PAGE_NONE		__pgprot(__PAGE_NONE     | _PAGE_NX)
+#define PAGE_SHARED_NOEXEC	__pgprot(__PAGE_SHARED   | _PAGE_NX)
+#define PAGE_COPY_NOEXEC	__pgprot(__PAGE_COPY     | _PAGE_NX)
+#define PAGE_READONLY_NOEXEC	__pgprot(__PAGE_READONLY | _PAGE_NX)
+#define PAGE_SHARED_EXEC	__pgprot(__PAGE_SHARED)
+#define PAGE_COPY_EXEC		__pgprot(__PAGE_COPY)
+#define PAGE_READONLY_EXEC	__pgprot(__PAGE_READONLY)
+#define PAGE_COPY		PAGE_COPY_NOEXEC
+#define PAGE_READONLY		PAGE_READONLY_NOEXEC
+#define PAGE_SHARED		PAGE_SHARED_EXEC
+
+#define __PAGE_KERNEL_BASE (_PAGE_PRESENTV | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
+
+#define __PAGE_KERNEL		(__PAGE_KERNEL_BASE | _PAGE_PROT_WKNU | _PAGE_CACHE | _PAGE_NX)
+#define __PAGE_KERNEL_NOCACHE	(__PAGE_KERNEL_BASE | _PAGE_PROT_WKNU | _PAGE_NX)
+#define __PAGE_KERNEL_EXEC	(__PAGE_KERNEL & ~_PAGE_NX)
+#define __PAGE_KERNEL_RO	(__PAGE_KERNEL_BASE | _PAGE_PROT_RKNU | _PAGE_CACHE | _PAGE_NX)
+#define __PAGE_KERNEL_LARGE	(__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#define PAGE_KERNEL		__pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC)
+
+/*
+ * Whilst the MN10300 can do page protection for execute (given separate data
+ * and insn TLBs), we are not supporting it at the moment. Write permission,
+ * however, always implies read permission (but not execute permission).
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY_NOEXEC
+#define __P010	PAGE_COPY_NOEXEC
+#define __P011	PAGE_COPY_NOEXEC
+#define __P100	PAGE_READONLY_EXEC
+#define __P101	PAGE_READONLY_EXEC
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY_NOEXEC
+#define __S010	PAGE_SHARED_NOEXEC
+#define __S011	PAGE_SHARED_NOEXEC
+#define __S100	PAGE_READONLY_EXEC
+#define __S101	PAGE_READONLY_EXEC
+#define __S110	PAGE_SHARED_EXEC
+#define __S111	PAGE_SHARED_EXEC
+
+/*
+ * Define this to warn about kernel memory accesses that are
+ * done without a 'verify_area(VERIFY_WRITE,..)'
+ */
+#undef TEST_VERIFY_AREA
+
+#define pte_present(x)	(pte_val(x) & _PAGE_VALID)
+#define pte_clear(mm, addr, xp)				\
+do {							\
+	set_pte_at((mm), (addr), (xp), __pte(0));	\
+} while (0)
+
+#define pmd_none(x)	(!pmd_val(x))
+#define pmd_present(x)	(!pmd_none(x))
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+#define	pmd_bad(x)	0
+
+
+#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_user(pte_t pte)	{ return pte_val(pte) & __PAGE_PROT_USER; }
+static inline int pte_read(pte_t pte)	{ return pte_val(pte) & __PAGE_PROT_USER; }
+static inline int pte_dirty(pte_t pte)	{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)	{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)	{ return pte_val(pte) & __PAGE_PROT_WRITE; }
+
+/*
+ * The following only works if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte)	{ return pte_val(pte) & _PAGE_FILE; }
+
+static inline pte_t pte_rdprotect(pte_t pte)
+{
+	pte_val(pte) &= ~(__PAGE_PROT_USER|__PAGE_PROT_UWAUX); return pte;
+}
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_NX; return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) &= ~(__PAGE_PROT_WRITE|__PAGE_PROT_UWAUX); return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkexec(pte_t pte)	{ pte_val(pte) &= ~_PAGE_NX; return pte; }
+
+static inline pte_t pte_mkread(pte_t pte)
+{
+	pte_val(pte) |= __PAGE_PROT_USER;
+	if (pte_write(pte))
+		pte_val(pte) |= __PAGE_PROT_UWAUX;
+	return pte;
+}
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_val(pte) |= __PAGE_PROT_WRITE;
+	if (pte_val(pte) & __PAGE_PROT_USER)
+		pte_val(pte) |= __PAGE_PROT_UWAUX;
+	return pte;
+}
+
+#define pte_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \
+	       __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
+	       __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+#define pgd_clear(xp)				do { } while (0)
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval)			(*(pteptr) = pteval)
+#define set_pte_at(mm, addr, ptep, pteval)	set_pte((ptep), (pteval))
+#define set_pte_atomic(pteptr, pteval)		set_pte((pteptr), (pteval))
+
+/*
+ * (pmds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define ptep_get_and_clear(mm, addr, ptep) \
+	__pte(xchg(&(ptep)->pte, 0))
+#define pte_same(a, b)		(pte_val(a) == pte_val(b))
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+#define pte_none(x)		(!pte_val(x))
+#define pte_pfn(x)		((unsigned long) (pte_val(x) >> PAGE_SHIFT))
+#define __pfn_addr(pfn)		((pfn) << PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_addr(pfn) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot)	__pmd(__pfn_addr(pfn) | pgprot_val(prot))
+
+/*
+ * All present user pages are user-executable:
+ */
+static inline int pte_exec(pte_t pte)
+{
+	return pte_user(pte);
+}
+
+/*
+ * All present pages are kernel-executable:
+ */
+static inline int pte_exec_kernel(pte_t pte)
+{
+	return 1;
+}
+
+/*
+ * Bits 0 and 1 are taken, split up the 29 bits of offset
+ * into this range:
+ */
+#define PTE_FILE_MAX_BITS	29
+
+#define pte_to_pgoff(pte)	(pte_val(pte) >> 2)
+#define pgoff_to_pte(off)	__pte((off) << 2 | _PAGE_FILE)
+
+/* Encode and de-code a swap entry */
+#define __swp_type(x)			(((x).val >> 2) & 0x3f)
+#define __swp_offset(x)			((x).val >> 8)
+#define __swp_entry(type, offset) \
+	((swp_entry_t) { ((type) << 2) | ((offset) << 8) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		__pte((x).val)
+
+static inline
+int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr,
+			      pte_t *ptep)
+{
+	if (!pte_dirty(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
+}
+
+static inline
+int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr,
+			      pte_t *ptep)
+{
+	if (!pte_young(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
+}
+
+static inline
+void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_val(*ptep) &= ~(__PAGE_PROT_WRITE|__PAGE_PROT_UWAUX);
+}
+
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+	set_bit(_PAGE_BIT_DIRTY, &ptep->pte);
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".  On processors which
+ * do not support it, this is a no-op.
+ */
+#define pgprot_noncached(prot)	__pgprot(pgprot_val(prot) | _PAGE_CACHE)
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte_huge(entry) \
+	((entry).pte |= _PAGE_PRESENT | _PAGE_PSE | _PAGE_VALID)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) &= _PAGE_CHG_MASK;
+	pte_val(pte) |= pgprot_val(newprot);
+	return pte;
+}
+
+#define page_pte(page)	page_pte_prot((page), __pgprot(0))
+
+#define pmd_page_kernel(pmd) \
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+#define pmd_page(pmd)	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+
+#define pmd_large(pmd) \
+	((pmd_val(pmd) & (_PAGE_PSE | _PAGE_PRESENT)) == \
+	 (_PAGE_PSE | _PAGE_PRESENT))
+
+/*
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * this macro returns the index of the entry in the pgd page which would
+ * control the given virtual address
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+ */
+#define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
+
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address)	pgd_offset(&init_mm, address)
+
+/*
+ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * this macro returns the index of the entry in the pmd page which would
+ * control the given virtual address
+ */
+#define pmd_index(address) \
+	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
+/*
+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * this macro returns the index of the entry in the pte page which would
+ * control the given virtual address
+ */
+#define pte_index(address) \
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(dir, address) \
+	((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+
+/*
+ * Make a given kernel text page executable/non-executable.
+ * Returns the previous executability setting of that page (which
+ * is used to restore the previous state). Used by the SMP bootup code.
+ * NOTE: this is an __init function for security reasons.
+ */
+static inline int set_kernel_exec(unsigned long vaddr, int enable)
+{
+	return 0;
+}
+
+#define pte_offset_map(dir, address) \
+	((pte_t *) page_address(pmd_page(*(dir))) + pte_index(address))
+#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
+#define pte_unmap(pte)		do {} while (0)
+#define pte_unmap_nested(pte)	do {} while (0)
+
+/*
+ * The MN10300 has external MMU info in the form of a TLB: this is adapted from
+ * the kernel page tables containing the necessary information by tlb-mn10300.S
+ */
+extern void update_mmu_cache(struct vm_area_struct *vma,
+			     unsigned long address, pte_t pte);
+
+#endif /* !__ASSEMBLY__ */
+
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+	remap_pfn_range((vma), (vaddr), (pfn), (size), (prot))
+
+#define MK_IOSPACE_PFN(space, pfn)	(pfn)
+#define GET_IOSPACE(pfn)		0
+#define GET_PFN(pfn)			(pfn)
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTEP_MKDIRTY
+#define __HAVE_ARCH_PTE_SAME
+#include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_PGTABLE_H */
diff --git a/include/asm-mn10300/pio-regs.h b/include/asm-mn10300/pio-regs.h
new file mode 100644
index 000000000000..96bc8182d0ba
--- /dev/null
+++ b/include/asm-mn10300/pio-regs.h
@@ -0,0 +1,233 @@
+/* MN10300 On-board I/O port module registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PIO_REGS_H
+#define _ASM_PIO_REGS_H
+
+#include <asm/cpu-regs.h>
+#include <asm/intctl-regs.h>
+
+#ifdef __KERNEL__
+
+/* I/O port 0 */
+#define	P0MD			__SYSREG(0xdb000000, u16)	/* mode reg */
+#define P0MD_0			0x0003	/* mask */
+#define P0MD_0_IN		0x0000	/* input mode */
+#define P0MD_0_OUT		0x0001	/* output mode */
+#define P0MD_0_TM0IO		0x0002	/* timer 0 I/O mode */
+#define P0MD_0_EYECLK		0x0003	/* test signal output (clock) */
+#define P0MD_1			0x000c
+#define P0MD_1_IN		0x0000
+#define P0MD_1_OUT		0x0004
+#define P0MD_1_TM1IO		0x0008	/* timer 1 I/O mode */
+#define P0MD_1_EYED		0x000c	/* test signal output (data) */
+#define P0MD_2			0x0030
+#define P0MD_2_IN		0x0000
+#define P0MD_2_OUT		0x0010
+#define P0MD_2_TM2IO		0x0020	/* timer 2 I/O mode */
+#define P0MD_3			0x00c0
+#define P0MD_3_IN		0x0000
+#define P0MD_3_OUT		0x0040
+#define P0MD_3_TM3IO		0x0080	/* timer 3 I/O mode */
+#define P0MD_4			0x0300
+#define P0MD_4_IN		0x0000
+#define P0MD_4_OUT		0x0100
+#define P0MD_4_TM4IO		0x0200	/* timer 4 I/O mode */
+#define P0MD_4_XCTS		0x0300	/* XCTS input for serial port 2 */
+#define P0MD_5			0x0c00
+#define P0MD_5_IN		0x0000
+#define P0MD_5_OUT		0x0400
+#define P0MD_5_TM5IO		0x0800	/* timer 5 I/O mode */
+#define P0MD_6			0x3000
+#define P0MD_6_IN		0x0000
+#define P0MD_6_OUT		0x1000
+#define P0MD_6_TM6IOA		0x2000	/* timer 6 I/O mode A */
+#define P0MD_7			0xc000
+#define P0MD_7_IN		0x0000
+#define P0MD_7_OUT		0x4000
+#define P0MD_7_TM6IOB		0x8000	/* timer 6 I/O mode B */
+
+#define	P0IN			__SYSREG(0xdb000004, u8)	/* in reg */
+#define	P0OUT			__SYSREG(0xdb000008, u8)	/* out reg */
+
+#define	P0TMIO			__SYSREG(0xdb00000c, u8)	/* TM pin I/O control reg */
+#define P0TMIO_TM0_IN		0x00
+#define P0TMIO_TM0_OUT		0x01
+#define P0TMIO_TM1_IN		0x00
+#define P0TMIO_TM1_OUT		0x02
+#define P0TMIO_TM2_IN		0x00
+#define P0TMIO_TM2_OUT		0x04
+#define P0TMIO_TM3_IN		0x00
+#define P0TMIO_TM3_OUT		0x08
+#define P0TMIO_TM4_IN		0x00
+#define P0TMIO_TM4_OUT		0x10
+#define P0TMIO_TM5_IN		0x00
+#define P0TMIO_TM5_OUT		0x20
+#define P0TMIO_TM6A_IN		0x00
+#define P0TMIO_TM6A_OUT		0x40
+#define P0TMIO_TM6B_IN		0x00
+#define P0TMIO_TM6B_OUT		0x80
+
+/* I/O port 1 */
+#define	P1MD			__SYSREG(0xdb000100, u16)	/* mode reg */
+#define P1MD_0			0x0003	/* mask */
+#define P1MD_0_IN		0x0000	/* input mode */
+#define P1MD_0_OUT		0x0001	/* output mode */
+#define P1MD_0_TM7IO		0x0002	/* timer 7 I/O mode */
+#define P1MD_0_ADTRG		0x0003	/* A/D converter trigger mode */
+#define P1MD_1			0x000c
+#define P1MD_1_IN		0x0000
+#define P1MD_1_OUT		0x0004
+#define P1MD_1_TM8IO		0x0008	/* timer 8 I/O mode */
+#define P1MD_1_XDMR0		0x000c	/* DMA request input 0 mode */
+#define P1MD_2			0x0030
+#define P1MD_2_IN		0x0000
+#define P1MD_2_OUT		0x0010
+#define P1MD_2_TM9IO		0x0020	/* timer 9 I/O mode */
+#define P1MD_2_XDMR1		0x0030	/* DMA request input 1 mode */
+#define P1MD_3			0x00c0
+#define P1MD_3_IN		0x0000
+#define P1MD_3_OUT		0x0040
+#define P1MD_3_TM10IO		0x0080	/* timer 10 I/O mode */
+#define P1MD_3_FRQS0		0x00c0	/* CPU clock multiplier setting input 0 mode */
+#define P1MD_4			0x0300
+#define P1MD_4_IN		0x0000
+#define P1MD_4_OUT		0x0100
+#define P1MD_4_TM11IO		0x0200	/* timer 11 I/O mode */
+#define P1MD_4_FRQS1		0x0300	/* CPU clock multiplier setting input 1 mode */
+
+#define	P1IN			__SYSREG(0xdb000104, u8)	/* in reg */
+#define	P1OUT			__SYSREG(0xdb000108, u8)	/* out reg */
+#define	P1TMIO			__SYSREG(0xdb00010c, u8)	/* TM pin I/O control reg */
+#define P1TMIO_TM11_IN		0x00
+#define P1TMIO_TM11_OUT		0x01
+#define P1TMIO_TM10_IN		0x00
+#define P1TMIO_TM10_OUT		0x02
+#define P1TMIO_TM9_IN		0x00
+#define P1TMIO_TM9_OUT		0x04
+#define P1TMIO_TM8_IN		0x00
+#define P1TMIO_TM8_OUT		0x08
+#define P1TMIO_TM7_IN		0x00
+#define P1TMIO_TM7_OUT		0x10
+
+/* I/O port 2 */
+#define	P2MD			__SYSREG(0xdb000200, u16)	/* mode reg */
+#define P2MD_0			0x0003	/* mask */
+#define P2MD_0_IN		0x0000	/* input mode */
+#define P2MD_0_OUT		0x0001	/* output mode */
+#define P2MD_0_BOOTBW		0x0003	/* boot bus width selector mode */
+#define P2MD_1			0x000c
+#define P2MD_1_IN		0x0000
+#define P2MD_1_OUT		0x0004
+#define P2MD_1_BOOTSEL		0x000c	/* boot device selector mode */
+#define P2MD_2			0x0030
+#define P2MD_2_IN		0x0000
+#define P2MD_2_OUT		0x0010
+#define P2MD_3			0x00c0
+#define P2MD_3_IN		0x0000
+#define P2MD_3_OUT		0x0040
+#define P2MD_3_CKIO		0x00c0	/* mode */
+#define P2MD_4			0x0300
+#define P2MD_4_IN		0x0000
+#define P2MD_4_OUT		0x0100
+#define P2MD_4_CMOD		0x0300	/* mode */
+
+#define	P2IN			__SYSREG(0xdb000204, u8)	/* in reg */
+#define	P2OUT			__SYSREG(0xdb000208, u8)	/* out reg */
+#define	P2TMIO			__SYSREG(0xdb00020c, u8)	/* TM pin I/O control reg */
+
+/* I/O port 3 */
+#define	P3MD			__SYSREG(0xdb000300, u16)	/* mode reg */
+#define P3MD_0			0x0003	/* mask */
+#define P3MD_0_IN		0x0000	/* input mode */
+#define P3MD_0_OUT		0x0001	/* output mode */
+#define P3MD_0_AFRXD		0x0002	/* AFR interface mode */
+#define P3MD_1			0x000c
+#define P3MD_1_IN		0x0000
+#define P3MD_1_OUT		0x0004
+#define P3MD_1_AFTXD		0x0008	/* AFR interface mode */
+#define P3MD_2			0x0030
+#define P3MD_2_IN		0x0000
+#define P3MD_2_OUT		0x0010
+#define P3MD_2_AFSCLK		0x0020	/* AFR interface mode */
+#define P3MD_3			0x00c0
+#define P3MD_3_IN		0x0000
+#define P3MD_3_OUT		0x0040
+#define P3MD_3_AFFS		0x0080	/* AFR interface mode */
+#define P3MD_4			0x0300
+#define P3MD_4_IN		0x0000
+#define P3MD_4_OUT		0x0100
+#define P3MD_4_AFEHC		0x0200	/* AFR interface mode */
+
+#define	P3IN			__SYSREG(0xdb000304, u8)	/* in reg */
+#define	P3OUT			__SYSREG(0xdb000308, u8)	/* out reg */
+
+/* I/O port 4 */
+#define	P4MD			__SYSREG(0xdb000400, u16)	/* mode reg */
+#define P4MD_0			0x0003	/* mask */
+#define P4MD_0_IN		0x0000	/* input mode */
+#define P4MD_0_OUT		0x0001	/* output mode */
+#define P4MD_0_SCL0		0x0002	/* I2C/serial mode */
+#define P4MD_1			0x000c
+#define P4MD_1_IN		0x0000
+#define P4MD_1_OUT		0x0004
+#define P4MD_1_SDA0		0x0008
+#define P4MD_2			0x0030
+#define P4MD_2_IN		0x0000
+#define P4MD_2_OUT		0x0010
+#define P4MD_2_SCL1		0x0020
+#define P4MD_3			0x00c0
+#define P4MD_3_IN		0x0000
+#define P4MD_3_OUT		0x0040
+#define P4MD_3_SDA1		0x0080
+#define P4MD_4			0x0300
+#define P4MD_4_IN		0x0000
+#define P4MD_4_OUT		0x0100
+#define P4MD_4_SBO0		0x0200
+#define P4MD_5			0x0c00
+#define P4MD_5_IN		0x0000
+#define P4MD_5_OUT		0x0400
+#define P4MD_5_SBO1		0x0800
+#define P4MD_6			0x3000
+#define P4MD_6_IN		0x0000
+#define P4MD_6_OUT		0x1000
+#define P4MD_6_SBT0		0x2000
+#define P4MD_7			0xc000
+#define P4MD_7_IN		0x0000
+#define P4MD_7_OUT		0x4000
+#define P4MD_7_SBT1		0x8000
+
+#define	P4IN			__SYSREG(0xdb000404, u8)	/* in reg */
+#define	P4OUT			__SYSREG(0xdb000408, u8)	/* out reg */
+
+/* I/O port 5 */
+#define	P5MD			__SYSREG(0xdb000500, u16)	/* mode reg */
+#define P5MD_0			0x0003	/* mask */
+#define P5MD_0_IN		0x0000	/* input mode */
+#define P5MD_0_OUT		0x0001	/* output mode */
+#define P5MD_0_IRTXD		0x0002	/* IrDA mode */
+#define P5MD_0_SOUT		0x0004	/* serial mode */
+#define P5MD_1			0x000c
+#define P5MD_1_IN		0x0000
+#define P5MD_1_OUT		0x0004
+#define P5MD_1_IRRXDS		0x0008	/* IrDA mode */
+#define P5MD_1_SIN		0x000c	/* serial mode */
+#define P5MD_2			0x0030
+#define P5MD_2_IN		0x0000
+#define P5MD_2_OUT		0x0010
+#define P5MD_2_IRRXDF		0x0020	/* IrDA mode */
+
+#define	P5IN			__SYSREG(0xdb000504, u8)	/* in reg */
+#define	P5OUT			__SYSREG(0xdb000508, u8)	/* out reg */
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_PIO_REGS_H */
diff --git a/include/asm-mn10300/poll.h b/include/asm-mn10300/poll.h
new file mode 100644
index 000000000000..c98509d3149e
--- /dev/null
+++ b/include/asm-mn10300/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/include/asm-mn10300/posix_types.h b/include/asm-mn10300/posix_types.h
new file mode 100644
index 000000000000..077567c37798
--- /dev/null
+++ b/include/asm-mn10300/posix_types.h
@@ -0,0 +1,132 @@
+/* MN10300 POSIX types
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_POSIX_TYPES_H
+#define _ASM_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long	__kernel_ino_t;
+typedef unsigned short	__kernel_mode_t;
+typedef unsigned short	__kernel_nlink_t;
+typedef long		__kernel_off_t;
+typedef int		__kernel_pid_t;
+typedef unsigned short	__kernel_ipc_pid_t;
+typedef unsigned short	__kernel_uid_t;
+typedef unsigned short	__kernel_gid_t;
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef int		__kernel_ptrdiff_t;
+typedef long		__kernel_time_t;
+typedef long		__kernel_suseconds_t;
+typedef long		__kernel_clock_t;
+typedef int		__kernel_timer_t;
+typedef int		__kernel_clockid_t;
+typedef int		__kernel_daddr_t;
+typedef char *		__kernel_caddr_t;
+typedef unsigned short	__kernel_uid16_t;
+typedef unsigned short	__kernel_gid16_t;
+typedef unsigned int	__kernel_uid32_t;
+typedef unsigned int	__kernel_gid32_t;
+
+typedef unsigned short	__kernel_old_uid_t;
+typedef unsigned short	__kernel_old_gid_t;
+typedef unsigned short	__kernel_old_dev_t;
+
+#ifdef __GNUC__
+typedef long long	__kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+	int	val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+	int	__val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#undef	__FD_SET
+static inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef	__FD_CLR
+static inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+
+#undef	__FD_ISSET
+static inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef	__FD_ZERO
+static inline void __FD_ZERO(__kernel_fd_set *__p)
+{
+	unsigned long *__tmp = __p->fds_bits;
+	int __i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+		case 16:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			__tmp[ 8] = 0; __tmp[ 9] = 0;
+			__tmp[10] = 0; __tmp[11] = 0;
+			__tmp[12] = 0; __tmp[13] = 0;
+			__tmp[14] = 0; __tmp[15] = 0;
+			return;
+
+		case 8:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			return;
+
+		case 4:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			return;
+		}
+	}
+	__i = __FDSET_LONGS;
+	while (__i) {
+		__i--;
+		*__tmp = 0;
+		__tmp++;
+	}
+}
+
+#endif /* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */
+
+#endif /* _ASM_POSIX_TYPES_H */
diff --git a/include/asm-mn10300/proc-mn103e010/cache.h b/include/asm-mn10300/proc-mn103e010/cache.h
new file mode 100644
index 000000000000..bdc1f9a59b4c
--- /dev/null
+++ b/include/asm-mn10300/proc-mn103e010/cache.h
@@ -0,0 +1,33 @@
+/* MN103E010 Cache specification
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PROC_CACHE_H
+#define _ASM_PROC_CACHE_H
+
+/* L1 cache */
+
+#define L1_CACHE_NWAYS		4	/* number of ways in caches */
+#define L1_CACHE_NENTRIES	256	/* number of entries in each way */
+#define L1_CACHE_BYTES		16	/* bytes per entry */
+#define L1_CACHE_SHIFT		4	/* shift for bytes per entry */
+#define L1_CACHE_WAYDISP	0x1000	/* displacement of one way from the next */
+
+#define L1_CACHE_TAG_VALID	0x00000001	/* cache tag valid bit */
+#define L1_CACHE_TAG_DIRTY	0x00000008	/* data cache tag dirty bit */
+#define L1_CACHE_TAG_ENTRY	0x00000ff0	/* cache tag entry address mask */
+#define L1_CACHE_TAG_ADDRESS	0xfffff000	/* cache tag line address mask */
+
+/*
+ * specification of the interval between interrupt checking intervals whilst
+ * managing the cache with the interrupts disabled
+ */
+#define MN10300_DCACHE_INV_RANGE_INTR_LOG2_INTERVAL	4
+
+#endif /* _ASM_PROC_CACHE_H */
diff --git a/include/asm-mn10300/proc-mn103e010/clock.h b/include/asm-mn10300/proc-mn103e010/clock.h
new file mode 100644
index 000000000000..caf998350633
--- /dev/null
+++ b/include/asm-mn10300/proc-mn103e010/clock.h
@@ -0,0 +1,18 @@
+/* MN103E010-specific clocks
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PROC_CLOCK_H
+#define _ASM_PROC_CLOCK_H
+
+#include <asm/unit/clock.h>
+
+#define MN10300_WDCLK		MN10300_IOCLK
+
+#endif /* _ASM_PROC_CLOCK_H */
diff --git a/include/asm-mn10300/proc-mn103e010/irq.h b/include/asm-mn10300/proc-mn103e010/irq.h
new file mode 100644
index 000000000000..aa6ee8f98b1b
--- /dev/null
+++ b/include/asm-mn10300/proc-mn103e010/irq.h
@@ -0,0 +1,34 @@
+/* MN103E010 On-board interrupt controller numbers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PROC_IRQ_H
+#define _ASM_PROC_IRQ_H
+
+#ifdef __KERNEL__
+
+#define GxICR_NUM_IRQS		42
+
+#define GxICR_NUM_XIRQS		8
+
+#define XIRQ0		34
+#define XIRQ1		35
+#define XIRQ2		36
+#define XIRQ3		37
+#define XIRQ4		38
+#define XIRQ5		39
+#define XIRQ6		40
+#define XIRQ7		41
+
+#define XIRQ2IRQ(num)	(XIRQ0 + num)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_PROC_IRQ_H */
diff --git a/include/asm-mn10300/proc-mn103e010/proc.h b/include/asm-mn10300/proc-mn103e010/proc.h
new file mode 100644
index 000000000000..22a2b93f70b7
--- /dev/null
+++ b/include/asm-mn10300/proc-mn103e010/proc.h
@@ -0,0 +1,18 @@
+/* MN103E010 Processor description
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PROC_PROC_H
+#define _ASM_PROC_PROC_H
+
+#define PROCESSOR_VENDOR_NAME	"Matsushita"
+#define PROCESSOR_MODEL_NAME	"mn103e010"
+
+#endif /* _ASM_PROC_PROC_H */
diff --git a/include/asm-mn10300/processor.h b/include/asm-mn10300/processor.h
new file mode 100644
index 000000000000..f1b081f53468
--- /dev/null
+++ b/include/asm-mn10300/processor.h
@@ -0,0 +1,186 @@
+/* MN10300 Processor specifics
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PROCESSOR_H
+#define _ASM_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/cpu-regs.h>
+#include <linux/threads.h>
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr()			\
+({						\
+	void *__pc;				\
+	asm("mov pc,%0" : "=a"(__pc));		\
+	__pc;					\
+})
+
+extern void show_registers(struct pt_regs *regs);
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct mn10300_cpuinfo {
+	int		type;
+	unsigned long	loops_per_sec;
+	char		hard_math;
+	unsigned long	*pgd_quick;
+	unsigned long	*pte_quick;
+	unsigned long	pgtable_cache_sz;
+};
+
+extern struct mn10300_cpuinfo boot_cpu_data;
+
+#define cpu_data &boot_cpu_data
+#define current_cpu_data boot_cpu_data
+
+extern void identify_cpu(struct mn10300_cpuinfo *);
+extern void print_cpu_info(struct mn10300_cpuinfo *);
+extern void dodgy_tsc(void);
+#define cpu_relax() do {} while (0)
+
+/*
+ * User space process size: 1.75GB (default).
+ */
+#define TASK_SIZE		0x70000000
+
+/*
+ * Where to put the userspace stack by default
+ */
+#define STACK_TOP		0x70000000
+#define STACK_TOP_MAX		STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	0x30000000
+
+typedef struct {
+	unsigned long	seg;
+} mm_segment_t;
+
+struct fpu_state_struct {
+	unsigned long	fs[32];		/* fpu registers */
+	unsigned long	fpcr;		/* fpu control register */
+};
+
+struct thread_struct {
+	struct pt_regs		*uregs;		/* userspace register frame */
+	unsigned long		pc;		/* kernel PC */
+	unsigned long		sp;		/* kernel SP */
+	unsigned long		a3;		/* kernel FP */
+	unsigned long		wchan;
+	unsigned long		usp;
+	struct pt_regs		*__frame;
+	unsigned long		fpu_flags;
+#define THREAD_USING_FPU	0x00000001	/* T if this task is using the FPU */
+	struct fpu_state_struct	fpu_state;
+};
+
+#define INIT_THREAD				\
+{						\
+	.uregs		= init_uregs,		\
+	.pc		= 0,			\
+	.sp		= 0,			\
+	.a3		= 0,			\
+	.wchan		= 0,			\
+	.__frame	= NULL,			\
+}
+
+#define INIT_MMAP \
+{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, \
+  NULL, NULL }
+
+/*
+ * do necessary setup to start up a newly executed thread
+ * - need to discard the frame stacked by the kernel thread invoking the execve
+ *   syscall (see RESTORE_ALL macro)
+ */
+#define start_thread(regs, new_pc, new_sp) do {		\
+	set_fs(USER_DS);				\
+	__frame = current->thread.uregs;		\
+	__frame->epsw = EPSW_nSL | EPSW_IE | EPSW_IM;	\
+	__frame->pc = new_pc;				\
+	__frame->sp = new_sp;				\
+} while (0)
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+extern void prepare_to_copy(struct task_struct *tsk);
+
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define task_pt_regs(task)						\
+({									\
+       struct pt_regs *__regs__;					\
+       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
+       __regs__ - 1;							\
+})
+
+#define KSTK_EIP(task) (task_pt_regs(task)->pc)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
+
+#define KSTK_TOP(info)				\
+({						\
+	(unsigned long)(info) + THREAD_SIZE;	\
+})
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+
+static inline void prefetch(const void *x)
+{
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	asm volatile ("nop; nop; dcpf (%0)" : : "r"(x));
+#else
+	asm volatile ("dcpf (%0)" : : "r"(x));
+#endif
+#endif
+}
+
+static inline void prefetchw(const void *x)
+{
+#ifndef CONFIG_MN10300_CACHE_DISABLED
+#ifdef CONFIG_MN10300_PROC_MN103E010
+	asm volatile ("nop; nop; dcpf (%0)" : : "r"(x));
+#else
+	asm volatile ("dcpf (%0)" : : "r"(x));
+#endif
+#endif
+}
+
+#endif /* _ASM_PROCESSOR_H */
diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h
new file mode 100644
index 000000000000..b3684689fcce
--- /dev/null
+++ b/include/asm-mn10300/ptrace.h
@@ -0,0 +1,99 @@
+/* MN10300 Exception frame layout and ptrace constants
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_PTRACE_H
+#define _ASM_PTRACE_H
+
+#define PT_A3		0
+#define PT_A2		1
+#define PT_D3		2
+#define	PT_D2		3
+#define PT_MCVF		4
+#define	PT_MCRL		5
+#define PT_MCRH		6
+#define	PT_MDRQ		7
+#define	PT_E1		8
+#define	PT_E0		9
+#define	PT_E7		10
+#define	PT_E6		11
+#define	PT_E5		12
+#define	PT_E4		13
+#define	PT_E3		14
+#define	PT_E2		15
+#define	PT_SP		16
+#define	PT_LAR		17
+#define	PT_LIR		18
+#define	PT_MDR		19
+#define	PT_A1		20
+#define	PT_A0		21
+#define	PT_D1		22
+#define	PT_D0		23
+#define PT_ORIG_D0	24
+#define	PT_EPSW		25
+#define	PT_PC		26
+#define NR_PTREGS	27
+
+#ifndef __ASSEMBLY__
+/*
+ * This defines the way registers are stored in the event of an exception
+ * - the strange order is due to the MOVM instruction
+ */
+struct pt_regs {
+	unsigned long		a3;		/* syscall arg 3 */
+	unsigned long		a2;		/* syscall arg 4 */
+	unsigned long		d3;		/* syscall arg 5 */
+	unsigned long		d2;		/* syscall arg 6 */
+	unsigned long		mcvf;
+	unsigned long		mcrl;
+	unsigned long		mcrh;
+	unsigned long		mdrq;
+	unsigned long		e1;
+	unsigned long		e0;
+	unsigned long		e7;
+	unsigned long		e6;
+	unsigned long		e5;
+	unsigned long		e4;
+	unsigned long		e3;
+	unsigned long		e2;
+	unsigned long		sp;
+	unsigned long		lar;
+	unsigned long		lir;
+	unsigned long		mdr;
+	unsigned long		a1;
+	unsigned long		a0;		/* syscall arg 1 */
+	unsigned long		d1;		/* syscall arg 2 */
+	unsigned long		d0;		/* syscall ret */
+	struct pt_regs		*next;		/* next frame pointer */
+	unsigned long		orig_d0;	/* syscall number */
+	unsigned long		epsw;
+	unsigned long		pc;
+};
+#endif
+
+extern struct pt_regs *__frame; /* current frame pointer */
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD     0x00000001
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#define user_mode(regs)			(((regs)->epsw & EPSW_nSL) == EPSW_nSL)
+#define instruction_pointer(regs)	((regs)->pc)
+extern void show_regs(struct pt_regs *);
+#endif
+
+#define profile_pc(regs) ((regs)->pc)
+
+#endif /* _ASM_PTRACE_H */
diff --git a/include/asm-mn10300/reset-regs.h b/include/asm-mn10300/reset-regs.h
new file mode 100644
index 000000000000..174523d50132
--- /dev/null
+++ b/include/asm-mn10300/reset-regs.h
@@ -0,0 +1,64 @@
+/* MN10300 Reset controller and watchdog timer definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RESET_REGS_H
+#define _ASM_RESET_REGS_H
+
+#include <asm/cpu-regs.h>
+#include <asm/exceptions.h>
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_MN10300_WD_TIMER
+#define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
+#endif
+
+/*
+ * watchdog timer registers
+ */
+#define WDBC			__SYSREGC(0xc0001000, u8) /* watchdog binary counter reg */
+
+#define WDCTR			__SYSREG(0xc0001002, u8)  /* watchdog timer control reg */
+#define WDCTR_WDCK		0x07	/* clock source selection */
+#define WDCTR_WDCK_256th	0x00	/* - OSCI/256 */
+#define WDCTR_WDCK_1024th	0x01	/* - OSCI/1024 */
+#define WDCTR_WDCK_2048th	0x02	/* - OSCI/2048 */
+#define WDCTR_WDCK_16384th	0x03	/* - OSCI/16384 */
+#define WDCTR_WDCK_65536th	0x04	/* - OSCI/65536 */
+#define WDCTR_WDRST		0x40	/* binary counter reset */
+#define WDCTR_WDCNE		0x80	/* watchdog timer enable */
+
+#define RSTCTR			__SYSREG(0xc0001004, u8) /* reset control reg */
+#define RSTCTR_CHIPRST		0x01	/* chip reset */
+#define RSTCTR_DBFRST		0x02	/* double fault reset flag */
+#define RSTCTR_WDTRST		0x04	/* watchdog timer reset flag */
+#define RSTCTR_WDREN		0x08	/* watchdog timer reset enable */
+
+#ifndef __ASSEMBLY__
+
+static inline void mn10300_proc_hard_reset(void)
+{
+	RSTCTR &= ~RSTCTR_CHIPRST;
+	RSTCTR |= RSTCTR_CHIPRST;
+}
+
+extern unsigned int watchdog_alert_counter;
+
+extern void watchdog_go(void);
+extern asmlinkage void watchdog_handler(void);
+extern asmlinkage
+void watchdog_interrupt(struct pt_regs *, enum exception_code);
+
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_RESET_REGS_H */
diff --git a/include/asm-mn10300/resource.h b/include/asm-mn10300/resource.h
new file mode 100644
index 000000000000..04bc4db8921b
--- /dev/null
+++ b/include/asm-mn10300/resource.h
@@ -0,0 +1 @@
+#include <asm-generic/resource.h>
diff --git a/include/asm-mn10300/rtc-regs.h b/include/asm-mn10300/rtc-regs.h
new file mode 100644
index 000000000000..c42deefaec11
--- /dev/null
+++ b/include/asm-mn10300/rtc-regs.h
@@ -0,0 +1,86 @@
+/* MN10300 on-chip Real-Time Clock registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_RTC_REGS_H
+#define _ASM_RTC_REGS_H
+
+#include <asm/intctl-regs.h>
+
+#ifdef __KERNEL__
+
+#define RTSCR			__SYSREG(0xd8600000, u8) /* RTC seconds count reg */
+#define RTSAR			__SYSREG(0xd8600001, u8) /* RTC seconds alarm reg */
+#define RTMCR			__SYSREG(0xd8600002, u8) /* RTC minutes count reg */
+#define RTMAR			__SYSREG(0xd8600003, u8) /* RTC minutes alarm reg */
+#define RTHCR			__SYSREG(0xd8600004, u8) /* RTC hours count reg */
+#define RTHAR			__SYSREG(0xd8600005, u8) /* RTC hours alarm reg */
+#define RTDWCR			__SYSREG(0xd8600006, u8) /* RTC day of the week count reg */
+#define RTDMCR			__SYSREG(0xd8600007, u8) /* RTC days count reg */
+#define RTMTCR			__SYSREG(0xd8600008, u8) /* RTC months count reg */
+#define RTYCR			__SYSREG(0xd8600009, u8) /* RTC years count reg */
+
+#define RTCRA			__SYSREG(0xd860000a, u8)/* RTC control reg A */
+#define RTCRA_RS		0x0f	/* periodic timer interrupt cycle setting */
+#define RTCRA_RS_NONE		0x00	/* - off */
+#define RTCRA_RS_3_90625ms	0x01	/* - 3.90625ms	(1/256s) */
+#define RTCRA_RS_7_8125ms	0x02	/* - 7.8125ms	(1/128s) */
+#define RTCRA_RS_122_070us	0x03	/* - 122.070us	(1/8192s) */
+#define RTCRA_RS_244_141us	0x04	/* - 244.141us	(1/4096s) */
+#define RTCRA_RS_488_281us	0x05	/* - 488.281us	(1/2048s) */
+#define RTCRA_RS_976_5625us	0x06	/* - 976.5625us	(1/1024s) */
+#define RTCRA_RS_1_953125ms	0x07	/* - 1.953125ms	(1/512s) */
+#define RTCRA_RS_3_90624ms	0x08	/* - 3.90624ms	(1/256s) */
+#define RTCRA_RS_7_8125ms_b	0x09	/* - 7.8125ms	(1/128s) */
+#define RTCRA_RS_15_625ms	0x0a	/* - 15.625ms	(1/64s) */
+#define RTCRA_RS_31_25ms	0x0b	/* - 31.25ms	(1/32s) */
+#define RTCRA_RS_62_5ms		0x0c	/* - 62.5ms	(1/16s) */
+#define RTCRA_RS_125ms		0x0d	/* - 125ms	(1/8s) */
+#define RTCRA_RS_250ms		0x0e	/* - 250ms	(1/4s) */
+#define RTCRA_RS_500ms		0x0f	/* - 500ms	(1/2s) */
+#define RTCRA_DVR		0x40	/* divider reset */
+#define RTCRA_UIP		0x80	/* clock update flag */
+
+#define RTCRB			__SYSREG(0xd860000b, u8) /* RTC control reg B */
+#define RTCRB_DSE		0x01	/* daylight savings time enable */
+#define RTCRB_TM		0x02	/* time format */
+#define RTCRB_TM_12HR		0x00	/* - 12 hour format */
+#define RTCRB_TM_24HR		0x02	/* - 24 hour format */
+#define RTCRB_DM		0x04	/* numeric value format */
+#define RTCRB_DM_BCD		0x00	/* - BCD */
+#define RTCRB_DM_BINARY		0x04	/* - binary */
+#define RTCRB_UIE		0x10	/* update interrupt disable */
+#define RTCRB_AIE		0x20	/* alarm interrupt disable */
+#define RTCRB_PIE		0x40	/* periodic interrupt disable */
+#define RTCRB_SET		0x80	/* clock update enable */
+
+#define RTSRC			__SYSREG(0xd860000c, u8) /* RTC status reg C */
+#define RTSRC_UF		0x10	/* update end interrupt flag */
+#define RTSRC_AF		0x20	/* alarm interrupt flag */
+#define RTSRC_PF		0x40	/* periodic interrupt flag */
+#define RTSRC_IRQF		0x80	/* interrupt flag */
+
+#define RTIRQ			32
+#define RTICR			GxICR(RTIRQ)
+
+/*
+ * MC146818 RTC compatibility defs for the MN10300 on-chip RTC
+ */
+#define RTC_PORT(x)		0xd8600000
+#define RTC_ALWAYS_BCD		1	/* RTC operates in binary mode */
+
+#define CMOS_READ(addr)		__SYSREG(0xd8600000 + (addr), u8)
+#define CMOS_WRITE(val, addr)	\
+	do { __SYSREG(0xd8600000 + (addr), u8) = val; } while (0)
+
+#define RTC_IRQ			RTIRQ
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_RTC_REGS_H */
diff --git a/include/asm-mn10300/rtc.h b/include/asm-mn10300/rtc.h
new file mode 100644
index 000000000000..c295194cc703
--- /dev/null
+++ b/include/asm-mn10300/rtc.h
@@ -0,0 +1,41 @@
+/* MN10300 Real time clock definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_RTC_H
+#define _ASM_RTC_H
+
+#ifdef CONFIG_MN10300_RTC
+
+#include <linux/init.h>
+
+extern void check_rtc_time(void);
+extern void __init calibrate_clock(void);
+extern unsigned long __init get_initial_rtc_time(void);
+
+#else /* !CONFIG_MN10300_RTC */
+
+static inline void check_rtc_time(void)
+{
+}
+
+static inline void calibrate_clock(void)
+{
+}
+
+static inline unsigned long get_initial_rtc_time(void)
+{
+	return 0;
+}
+
+#endif /* !CONFIG_MN10300_RTC */
+
+#include <asm-generic/rtc.h>
+
+#endif /* _ASM_RTC_H */
diff --git a/include/asm-mn10300/scatterlist.h b/include/asm-mn10300/scatterlist.h
new file mode 100644
index 000000000000..e29d91dbcf2b
--- /dev/null
+++ b/include/asm-mn10300/scatterlist.h
@@ -0,0 +1,46 @@
+/* MN10300 Scatterlist definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SCATTERLIST_H
+#define _ASM_SCATTERLIST_H
+
+#include <asm/types.h>
+
+/*
+ * Drivers must set either ->address or (preferred) page and ->offset
+ * to indicate where data must be transferred to/from.
+ *
+ * Using page is recommended since it handles highmem data as well as
+ * low mem. ->address is restricted to data which has a virtual mapping, and
+ * it will go away in the future. Updating to page can be automated very
+ * easily -- something like
+ *
+ * sg->address = some_ptr;
+ *
+ * can be rewritten as
+ *
+ * sg_set_page(virt_to_page(some_ptr));
+ * sg->offset = (unsigned long) some_ptr & ~PAGE_MASK;
+ *
+ * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
+ */
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;		/* for highmem, page offset */
+	dma_addr_t	dma_address;
+	unsigned int	length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* _ASM_SCATTERLIST_H */
diff --git a/include/asm-mn10300/sections.h b/include/asm-mn10300/sections.h
new file mode 100644
index 000000000000..2b8c5160388f
--- /dev/null
+++ b/include/asm-mn10300/sections.h
@@ -0,0 +1 @@
+#include <asm-generic/sections.h>
diff --git a/include/asm-mn10300/semaphore.h b/include/asm-mn10300/semaphore.h
new file mode 100644
index 000000000000..5a9e1ad0b253
--- /dev/null
+++ b/include/asm-mn10300/semaphore.h
@@ -0,0 +1,169 @@
+/* MN10300 Semaphores
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SEMAPHORE_H
+#define _ASM_SEMAPHORE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+
+#define SEMAPHORE_DEBUG		0
+
+/*
+ * the semaphore definition
+ * - if count is >0 then there are tokens available on the semaphore for down
+ *   to collect
+ * - if count is <=0 then there are no spare tokens, and anyone that wants one
+ *   must wait
+ * - if wait_list is not empty, then there are processes waiting for the
+ *   semaphore
+ */
+struct semaphore {
+	atomic_t		count;		/* it's not really atomic, it's
+						 * just that certain modules
+						 * expect to be able to access
+						 * it directly */
+	spinlock_t		wait_lock;
+	struct list_head	wait_list;
+#if SEMAPHORE_DEBUG
+	unsigned		__magic;
+#endif
+};
+
+#if SEMAPHORE_DEBUG
+# define __SEM_DEBUG_INIT(name) , (long)&(name).__magic
+#else
+# define __SEM_DEBUG_INIT(name)
+#endif
+
+
+#define __SEMAPHORE_INITIALIZER(name, init_count)			\
+{									\
+	.count		= ATOMIC_INIT(init_count),			\
+	.wait_lock	= __SPIN_LOCK_UNLOCKED((name).wait_lock),	\
+	.wait_list	= LIST_HEAD_INIT((name).wait_list)		\
+	__SEM_DEBUG_INIT(name)						\
+}
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name, 1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name, 0)
+
+static inline void sema_init(struct semaphore *sem, int val)
+{
+	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
+}
+
+static inline void init_MUTEX(struct semaphore *sem)
+{
+	sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED(struct semaphore *sem)
+{
+	sema_init(sem, 0);
+}
+
+extern void __down(struct semaphore *sem, unsigned long flags);
+extern int  __down_interruptible(struct semaphore *sem, unsigned long flags);
+extern void __up(struct semaphore *sem);
+
+static inline void down(struct semaphore *sem)
+{
+	unsigned long flags;
+	int count;
+
+#if SEMAPHORE_DEBUG
+	CHECK_MAGIC(sem->__magic);
+#endif
+
+	spin_lock_irqsave(&sem->wait_lock, flags);
+	count = atomic_read(&sem->count);
+	if (likely(count > 0)) {
+		atomic_set(&sem->count, count - 1);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
+	} else {
+		__down(sem, flags);
+	}
+}
+
+static inline int down_interruptible(struct semaphore *sem)
+{
+	unsigned long flags;
+	int count, ret = 0;
+
+#if SEMAPHORE_DEBUG
+	CHECK_MAGIC(sem->__magic);
+#endif
+
+	spin_lock_irqsave(&sem->wait_lock, flags);
+	count = atomic_read(&sem->count);
+	if (likely(count > 0)) {
+		atomic_set(&sem->count, count - 1);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
+	} else {
+		ret = __down_interruptible(sem, flags);
+	}
+	return ret;
+}
+
+/*
+ * non-blockingly attempt to down() a semaphore.
+ * - returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore *sem)
+{
+	unsigned long flags;
+	int count, success = 0;
+
+#if SEMAPHORE_DEBUG
+	CHECK_MAGIC(sem->__magic);
+#endif
+
+	spin_lock_irqsave(&sem->wait_lock, flags);
+	count = atomic_read(&sem->count);
+	if (likely(count > 0)) {
+		atomic_set(&sem->count, count - 1);
+		success = 1;
+	}
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
+	return !success;
+}
+
+static inline void up(struct semaphore *sem)
+{
+	unsigned long flags;
+
+#if SEMAPHORE_DEBUG
+	CHECK_MAGIC(sem->__magic);
+#endif
+
+	spin_lock_irqsave(&sem->wait_lock, flags);
+	if (!list_empty(&sem->wait_list))
+		__up(sem);
+	else
+		atomic_set(&sem->count, atomic_read(&sem->count) + 1);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
+}
+
+static inline int sem_getcount(struct semaphore *sem)
+{
+	return atomic_read(&sem->count);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/include/asm-mn10300/sembuf.h b/include/asm-mn10300/sembuf.h
new file mode 100644
index 000000000000..301f3f9d8aa9
--- /dev/null
+++ b/include/asm-mn10300/sembuf.h
@@ -0,0 +1,25 @@
+#ifndef _ASM_SEMBUF_H
+#define _ASM_SEMBUF_H
+
+/*
+ * The semid64_ds structure for MN10300 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;		/* last semop time */
+	unsigned long	__unused1;
+	__kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	__unused2;
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _ASM_SEMBUF_H */
diff --git a/include/asm-mn10300/serial-regs.h b/include/asm-mn10300/serial-regs.h
new file mode 100644
index 000000000000..6498469e93ac
--- /dev/null
+++ b/include/asm-mn10300/serial-regs.h
@@ -0,0 +1,160 @@
+/* MN10300 on-board serial port module registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SERIAL_REGS_H
+#define _ASM_SERIAL_REGS_H
+
+#include <asm/cpu-regs.h>
+#include <asm/intctl-regs.h>
+
+#ifdef __KERNEL__
+
+/* serial port 0 */
+#define	SC0CTR			__SYSREG(0xd4002000, u16)	/* control reg */
+#define	SC01CTR_CK		0x0007	/* clock source select */
+#define	SC0CTR_CK_TM8UFLOW_8	0x0000	/* - 1/8 timer 8 underflow (serial port 0 only) */
+#define	SC1CTR_CK_TM9UFLOW_8	0x0000	/* - 1/8 timer 9 underflow (serial port 1 only) */
+#define	SC01CTR_CK_IOCLK_8	0x0001	/* - 1/8 IOCLK */
+#define	SC01CTR_CK_IOCLK_32	0x0002	/* - 1/32 IOCLK */
+#define	SC0CTR_CK_TM2UFLOW_2	0x0003	/* - 1/2 timer 2 underflow (serial port 0 only) */
+#define	SC1CTR_CK_TM3UFLOW_2	0x0003	/* - 1/2 timer 3 underflow (serial port 1 only) */
+#define	SC0CTR_CK_TM0UFLOW_8	0x0004	/* - 1/8 timer 1 underflow (serial port 0 only) */
+#define	SC1CTR_CK_TM1UFLOW_8	0x0004	/* - 1/8 timer 2 underflow (serial port 1 only) */
+#define	SC0CTR_CK_TM2UFLOW_8	0x0005	/* - 1/8 timer 2 underflow (serial port 0 only) */
+#define	SC1CTR_CK_TM3UFLOW_8	0x0005	/* - 1/8 timer 3 underflow (serial port 1 only) */
+#define	SC01CTR_CK_EXTERN_8	0x0006	/* - 1/8 external closk */
+#define	SC01CTR_CK_EXTERN	0x0007	/* - external closk */
+#define	SC01CTR_STB		0x0008	/* stop bit select */
+#define	SC01CTR_STB_1BIT	0x0000	/* - 1 stop bit */
+#define	SC01CTR_STB_2BIT	0x0008	/* - 2 stop bits */
+#define	SC01CTR_PB		0x0070	/* parity bit select */
+#define	SC01CTR_PB_NONE		0x0000	/* - no parity */
+#define	SC01CTR_PB_FIXED0	0x0040	/* - fixed at 0 */
+#define	SC01CTR_PB_FIXED1	0x0050	/* - fixed at 1 */
+#define	SC01CTR_PB_EVEN		0x0060	/* - even parity */
+#define	SC01CTR_PB_ODD		0x0070	/* - odd parity */
+#define	SC01CTR_CLN		0x0080	/* character length */
+#define	SC01CTR_CLN_7BIT	0x0000	/* - 7 bit chars */
+#define	SC01CTR_CLN_8BIT	0x0080	/* - 8 bit chars */
+#define	SC01CTR_TOE		0x0100	/* T input output enable */
+#define	SC01CTR_OD		0x0200	/* bit order select */
+#define	SC01CTR_OD_LSBFIRST	0x0000	/* - LSB first */
+#define	SC01CTR_OD_MSBFIRST	0x0200	/* - MSB first */
+#define	SC01CTR_MD		0x0c00	/* mode select */
+#define SC01CTR_MD_STST_SYNC	0x0000	/* - start-stop synchronous */
+#define SC01CTR_MD_CLOCK_SYNC1	0x0400	/* - clock synchronous 1 */
+#define SC01CTR_MD_I2C		0x0800	/* - I2C mode */
+#define SC01CTR_MD_CLOCK_SYNC2	0x0c00	/* - clock synchronous 2 */
+#define	SC01CTR_IIC		0x1000	/* I2C mode select */
+#define	SC01CTR_BKE		0x2000	/* break transmit enable */
+#define	SC01CTR_RXE		0x4000	/* receive enable */
+#define	SC01CTR_TXE		0x8000	/* transmit enable */
+
+#define	SC0ICR			__SYSREG(0xd4002004, u8)	/* interrupt control reg */
+#define SC01ICR_DMD		0x80	/* output data mode */
+#define SC01ICR_TD		0x20	/* transmit DMA trigger cause */
+#define SC01ICR_TI		0x10	/* transmit interrupt cause */
+#define SC01ICR_RES		0x04	/* receive error select */
+#define SC01ICR_RI		0x01	/* receive interrupt cause */
+
+#define	SC0TXB			__SYSREG(0xd4002008, u8)	/* transmit buffer reg */
+#define	SC0RXB			__SYSREG(0xd4002009, u8)	/* receive buffer reg */
+
+#define	SC0STR			__SYSREG(0xd400200c, u16)	/* status reg */
+#define SC01STR_OEF		0x0001	/* overrun error found */
+#define SC01STR_PEF		0x0002	/* parity error found */
+#define SC01STR_FEF		0x0004	/* framing error found */
+#define SC01STR_RBF		0x0010	/* receive buffer status */
+#define SC01STR_TBF		0x0020	/* transmit buffer status */
+#define SC01STR_RXF		0x0040	/* receive status */
+#define SC01STR_TXF		0x0080	/* transmit status */
+#define SC01STR_STF		0x0100	/* I2C start sequence found */
+#define SC01STR_SPF		0x0200	/* I2C stop sequence found */
+
+#define SC0RXIRQ		20	/* timer 0 Receive IRQ */
+#define SC0TXIRQ		21	/* timer 0 Transmit IRQ */
+
+#define	SC0RXICR		GxICR(SC0RXIRQ)	/* serial 0 receive intr ctrl reg */
+#define	SC0TXICR		GxICR(SC0TXIRQ)	/* serial 0 transmit intr ctrl reg */
+
+/* serial port 1 */
+#define	SC1CTR			__SYSREG(0xd4002010, u16)	/* serial port 1 control */
+#define	SC1ICR			__SYSREG(0xd4002014, u8)	/* interrupt control reg */
+#define	SC1TXB			__SYSREG(0xd4002018, u8)	/* transmit buffer reg */
+#define	SC1RXB			__SYSREG(0xd4002019, u8)	/* receive buffer reg */
+#define	SC1STR			__SYSREG(0xd400201c, u16)	/* status reg */
+
+#define SC1RXIRQ		22	/* timer 1 Receive IRQ */
+#define SC1TXIRQ		23	/* timer 1 Transmit IRQ */
+
+#define	SC1RXICR		GxICR(SC1RXIRQ)	/* serial 1 receive intr ctrl reg */
+#define	SC1TXICR		GxICR(SC1TXIRQ)	/* serial 1 transmit intr ctrl reg */
+
+/* serial port 2 */
+#define	SC2CTR			__SYSREG(0xd4002020, u16)	/* control reg */
+#define	SC2CTR_CK		0x0003	/* clock source select */
+#define	SC2CTR_CK_TM10UFLOW	0x0000	/* - timer 10 underflow */
+#define	SC2CTR_CK_TM2UFLOW	0x0001	/* - timer 2 underflow */
+#define	SC2CTR_CK_EXTERN	0x0002	/* - external closk */
+#define	SC2CTR_CK_TM3UFLOW	0x0003	/* - timer 3 underflow */
+#define	SC2CTR_STB		0x0008	/* stop bit select */
+#define	SC2CTR_STB_1BIT		0x0000	/* - 1 stop bit */
+#define	SC2CTR_STB_2BIT		0x0008	/* - 2 stop bits */
+#define	SC2CTR_PB		0x0070	/* parity bit select */
+#define	SC2CTR_PB_NONE		0x0000	/* - no parity */
+#define	SC2CTR_PB_FIXED0	0x0040	/* - fixed at 0 */
+#define	SC2CTR_PB_FIXED1	0x0050	/* - fixed at 1 */
+#define	SC2CTR_PB_EVEN		0x0060	/* - even parity */
+#define	SC2CTR_PB_ODD		0x0070	/* - odd parity */
+#define	SC2CTR_CLN		0x0080	/* character length */
+#define	SC2CTR_CLN_7BIT		0x0000	/* - 7 bit chars */
+#define	SC2CTR_CLN_8BIT		0x0080	/* - 8 bit chars */
+#define	SC2CTR_TWE		0x0100	/* transmit wait enable (enable XCTS control) */
+#define	SC2CTR_OD		0x0200	/* bit order select */
+#define	SC2CTR_OD_LSBFIRST	0x0000	/* - LSB first */
+#define	SC2CTR_OD_MSBFIRST	0x0200	/* - MSB first */
+#define	SC2CTR_TWS		0x1000	/* transmit wait select */
+#define	SC2CTR_TWS_XCTS_HIGH	0x0000	/* - interrupt TX when XCTS high */
+#define	SC2CTR_TWS_XCTS_LOW	0x1000	/* - interrupt TX when XCTS low */
+#define	SC2CTR_BKE		0x2000	/* break transmit enable */
+#define	SC2CTR_RXE		0x4000	/* receive enable */
+#define	SC2CTR_TXE		0x8000	/* transmit enable */
+
+#define	SC2ICR			__SYSREG(0xd4002024, u8)	/* interrupt control reg */
+#define SC2ICR_TD		0x20	/* transmit DMA trigger cause */
+#define SC2ICR_TI		0x10	/* transmit interrupt cause */
+#define SC2ICR_RES		0x04	/* receive error select */
+#define SC2ICR_RI		0x01	/* receive interrupt cause */
+
+#define	SC2TXB			__SYSREG(0xd4002018, u8)	/* transmit buffer reg */
+#define	SC2RXB			__SYSREG(0xd4002019, u8)	/* receive buffer reg */
+#define	SC2STR			__SYSREG(0xd400201c, u8)	/* status reg */
+#define SC2STR_OEF		0x0001	/* overrun error found */
+#define SC2STR_PEF		0x0002	/* parity error found */
+#define SC2STR_FEF		0x0004	/* framing error found */
+#define SC2STR_CTS		0x0008	/* XCTS input pin status (0 means high) */
+#define SC2STR_RBF		0x0010	/* receive buffer status */
+#define SC2STR_TBF		0x0020	/* transmit buffer status */
+#define SC2STR_RXF		0x0040	/* receive status */
+#define SC2STR_TXF		0x0080	/* transmit status */
+
+#define	SC2TIM			__SYSREG(0xd400202d, u8)	/* status reg */
+
+#define SC2RXIRQ		24	/* serial 2 Receive IRQ */
+#define SC2TXIRQ		25	/* serial 2 Transmit IRQ */
+
+#define	SC2RXICR		GxICR(SC2RXIRQ)	/* serial 2 receive intr ctrl reg */
+#define	SC2TXICR		GxICR(SC2TXIRQ)	/* serial 2 transmit intr ctrl reg */
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SERIAL_REGS_H */
diff --git a/include/asm-mn10300/serial.h b/include/asm-mn10300/serial.h
new file mode 100644
index 000000000000..99785a9deadb
--- /dev/null
+++ b/include/asm-mn10300/serial.h
@@ -0,0 +1,36 @@
+/* Standard UART definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * The ASB2305 has an 18.432 MHz clock the UART
+ */
+#define BASE_BAUD	(18432000 / 16)
+
+/* Standard COM flags (except for COM4, because of the 8514 problem) */
+#ifdef CONFIG_SERIAL_DETECT_IRQ
+#define STD_COM_FLAGS	(ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
+#define STD_COM4_FLAGS	(ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#else
+#define STD_COM_FLAGS	(ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+#define STD_COM4_FLAGS	ASYNC_BOOT_AUTOCONF
+#endif
+
+#ifdef CONFIG_SERIAL_MANY_PORTS
+#define FOURPORT_FLAGS	ASYNC_FOURPORT
+#define ACCENT_FLAGS	0
+#define BOCA_FLAGS	0
+#define HUB6_FLAGS	0
+#define RS_TABLE_SIZE	64
+#else
+#define RS_TABLE_SIZE
+#endif
+
+#include <asm/unit/serial.h>
diff --git a/include/asm-mn10300/setup.h b/include/asm-mn10300/setup.h
new file mode 100644
index 000000000000..08356c832283
--- /dev/null
+++ b/include/asm-mn10300/setup.h
@@ -0,0 +1,17 @@
+/* MN10300 Setup declarations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SETUP_H
+#define _ASM_SETUP_H
+
+extern void __init unit_setup(void);
+extern void __init unit_init_IRQ(void);
+
+#endif /* _ASM_SETUP_H */
diff --git a/include/asm-mn10300/shmbuf.h b/include/asm-mn10300/shmbuf.h
new file mode 100644
index 000000000000..8f300cc35d6c
--- /dev/null
+++ b/include/asm-mn10300/shmbuf.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_SHMBUF_H
+#define _ASM_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for MN10300 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	unsigned long		__unused1;
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	unsigned long		__unused2;
+	__kernel_time_t		shm_ctime;	/* last change time */
+	unsigned long		__unused3;
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _ASM_SHMBUF_H */
diff --git a/include/asm-mn10300/shmparam.h b/include/asm-mn10300/shmparam.h
new file mode 100644
index 000000000000..ab666ed1a070
--- /dev/null
+++ b/include/asm-mn10300/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_SHMPARAM_H
+#define _ASM_SHMPARAM_H
+
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
+
+#endif /* _ASM_SHMPARAM_H */
diff --git a/include/asm-mn10300/sigcontext.h b/include/asm-mn10300/sigcontext.h
new file mode 100644
index 000000000000..4de3afff4ad7
--- /dev/null
+++ b/include/asm-mn10300/sigcontext.h
@@ -0,0 +1,52 @@
+/* MN10300 Userspace signal context
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SIGCONTEXT_H
+#define _ASM_SIGCONTEXT_H
+
+struct fpucontext {
+	/* Regular FPU environment */
+	unsigned long	fs[32];		/* fpu registers */
+	unsigned long	fpcr;		/* fpu control register */
+};
+
+struct sigcontext {
+	unsigned long	d0;
+	unsigned long	d1;
+	unsigned long	d2;
+	unsigned long	d3;
+	unsigned long	a0;
+	unsigned long	a1;
+	unsigned long	a2;
+	unsigned long	a3;
+	unsigned long	e0;
+	unsigned long	e1;
+	unsigned long	e2;
+	unsigned long	e3;
+	unsigned long	e4;
+	unsigned long	e5;
+	unsigned long	e6;
+	unsigned long	e7;
+	unsigned long	lar;
+	unsigned long	lir;
+	unsigned long	mdr;
+	unsigned long	mcvf;
+	unsigned long	mcrl;
+	unsigned long	mcrh;
+	unsigned long	mdrq;
+	unsigned long	sp;
+	unsigned long	epsw;
+	unsigned long	pc;
+	struct fpucontext *fpucontext;
+	unsigned long	oldmask;
+};
+
+
+#endif /* _ASM_SIGCONTEXT_H */
diff --git a/include/asm-mn10300/siginfo.h b/include/asm-mn10300/siginfo.h
new file mode 100644
index 000000000000..0815d29d82e5
--- /dev/null
+++ b/include/asm-mn10300/siginfo.h
@@ -0,0 +1 @@
+#include <asm-generic/siginfo.h>
diff --git a/include/asm-mn10300/signal.h b/include/asm-mn10300/signal.h
new file mode 100644
index 000000000000..e98817cec5f7
--- /dev/null
+++ b/include/asm-mn10300/signal.h
@@ -0,0 +1,171 @@
+/* MN10300 Signal definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SIGNAL_H
+#define _ASM_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define _NSIG		64
+#define _NSIG_BPW	32
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long	sig[_NSIG_WORDS];
+} sigset_t;
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	(_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001U
+#define SA_NOCLDWAIT	0x00000002U
+#define SA_SIGINFO	0x00000004U
+#define SA_ONSTACK	0x08000000U
+#define SA_RESTART	0x10000000U
+#define SA_NODEFER	0x40000000U
+#define SA_RESETHAND	0x80000000U
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal.h>
+
+#ifdef __KERNEL__
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t _sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user	*ss_sp;
+	int		ss_flags;
+	size_t		ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+#include <asm/sigcontext.h>
+
+
+struct pt_regs;
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_SIGNAL_H */
diff --git a/include/asm-mn10300/smp.h b/include/asm-mn10300/smp.h
new file mode 100644
index 000000000000..4eb8c61b7dab
--- /dev/null
+++ b/include/asm-mn10300/smp.h
@@ -0,0 +1,18 @@
+/* MN10300 SMP support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SMP_H
+#define _ASM_SMP_H
+
+#ifdef CONFIG_SMP
+#error SMP not yet supported for MN10300
+#endif
+
+#endif
diff --git a/include/asm-mn10300/socket.h b/include/asm-mn10300/socket.h
new file mode 100644
index 000000000000..99ca648b94c5
--- /dev/null
+++ b/include/asm-mn10300/socket.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-mn10300/sockios.h b/include/asm-mn10300/sockios.h
new file mode 100644
index 000000000000..b03043a1c564
--- /dev/null
+++ b/include/asm-mn10300/sockios.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_SOCKIOS_H
+#define _ASM_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* _ASM_SOCKIOS_H */
diff --git a/include/asm-mn10300/spinlock.h b/include/asm-mn10300/spinlock.h
new file mode 100644
index 000000000000..4bf9c8b169e0
--- /dev/null
+++ b/include/asm-mn10300/spinlock.h
@@ -0,0 +1,16 @@
+/* MN10300 spinlock support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SPINLOCK_H
+#define _ASM_SPINLOCK_H
+
+#error SMP spinlocks not implemented for MN10300
+
+#endif /* _ASM_SPINLOCK_H */
diff --git a/include/asm-mn10300/stat.h b/include/asm-mn10300/stat.h
new file mode 100644
index 000000000000..63ff8371cf2c
--- /dev/null
+++ b/include/asm-mn10300/stat.h
@@ -0,0 +1,78 @@
+#ifndef _ASM_STAT_H
+#define _ASM_STAT_H
+
+struct __old_kernel_stat {
+	unsigned short st_dev;
+	unsigned short st_ino;
+	unsigned short st_mode;
+	unsigned short st_nlink;
+	unsigned short st_uid;
+	unsigned short st_gid;
+	unsigned short st_rdev;
+	unsigned long  st_size;
+	unsigned long  st_atime;
+	unsigned long  st_mtime;
+	unsigned long  st_ctime;
+};
+
+struct stat {
+	unsigned long  st_dev;
+	unsigned long  st_ino;
+	unsigned short st_mode;
+	unsigned short st_nlink;
+	unsigned short st_uid;
+	unsigned short st_gid;
+	unsigned long  st_rdev;
+	unsigned long  st_size;
+	unsigned long  st_blksize;
+	unsigned long  st_blocks;
+	unsigned long  st_atime;
+	unsigned long  st_atime_nsec;
+	unsigned long  st_mtime;
+	unsigned long  st_mtime_nsec;
+	unsigned long  st_ctime;
+	unsigned long  st_ctime_nsec;
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+	unsigned long long	st_dev;
+	unsigned char	__pad0[4];
+
+#define STAT64_HAS_BROKEN_ST_INO	1
+	unsigned long	__st_ino;
+
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned long	st_uid;
+	unsigned long	st_gid;
+
+	unsigned long long	st_rdev;
+	unsigned char	__pad3[4];
+
+	long long	st_size;
+	unsigned long	st_blksize;
+
+	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
+	unsigned long	__pad4;		/* future possible st_blocks high bits */
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+
+	unsigned long	st_mtime;
+	unsigned int	st_mtime_nsec;
+
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+
+	unsigned long long	st_ino;
+};
+
+#define STAT_HAVE_NSEC 1
+
+#endif /* _ASM_STAT_H */
diff --git a/include/asm-mn10300/statfs.h b/include/asm-mn10300/statfs.h
new file mode 100644
index 000000000000..0b91fe198c20
--- /dev/null
+++ b/include/asm-mn10300/statfs.h
@@ -0,0 +1 @@
+#include <asm-generic/statfs.h>
diff --git a/include/asm-mn10300/string.h b/include/asm-mn10300/string.h
new file mode 100644
index 000000000000..47dbd4346c32
--- /dev/null
+++ b/include/asm-mn10300/string.h
@@ -0,0 +1,32 @@
+/* MN10300 Optimised string functions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Modified by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_STRING_H
+#define _ASM_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMMOVE
+
+extern void *memset(void *dest, int ch, size_t count);
+extern void *memcpy(void *dest, const void *src, size_t count);
+extern void *memmove(void *dest, const void *src, size_t count);
+
+
+extern void __struct_cpy_bug(void);
+#define struct_cpy(x, y)			\
+({                                              \
+	if (sizeof(*(x)) != sizeof(*(y)))       \
+		__struct_cpy_bug;               \
+	memcpy(x, y, sizeof(*(x)));             \
+})
+
+#endif /* _ASM_STRING_H */
diff --git a/include/asm-mn10300/system.h b/include/asm-mn10300/system.h
new file mode 100644
index 000000000000..8214fb7e7fe4
--- /dev/null
+++ b/include/asm-mn10300/system.h
@@ -0,0 +1,237 @@
+/* MN10300 System definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_SYSTEM_H
+#define _ASM_SYSTEM_H
+
+#include <asm/cpu-regs.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/kernel.h>
+
+struct task_struct;
+struct thread_struct;
+
+extern asmlinkage
+struct task_struct *__switch_to(struct thread_struct *prev,
+				struct thread_struct *next,
+				struct task_struct *prev_task);
+
+/* context switching is now performed out-of-line in switch_to.S */
+#define switch_to(prev, next, last)					\
+do {									\
+	current->thread.wchan = (u_long) __builtin_return_address(0);	\
+	(last) = __switch_to(&(prev)->thread, &(next)->thread, (prev));	\
+	mb();								\
+	current->thread.wchan = 0;					\
+} while (0)
+
+#define arch_align_stack(x) (x)
+
+#define nop() asm volatile ("nop")
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+
+#define mb()	asm volatile ("": : :"memory")
+#define rmb()	mb()
+#define wmb()	asm volatile ("": : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#endif
+
+#define set_mb(var, value)  do { var = value;  mb(); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define read_barrier_depends()		do {} while (0)
+#define smp_read_barrier_depends()	do {} while (0)
+
+/*****************************************************************************/
+/*
+ * interrupt control
+ * - "disabled": run in IM1/2
+ *   - level 0 - GDB stub
+ *   - level 1 - virtual serial DMA (if present)
+ *   - level 5 - normal interrupt priority
+ *   - level 6 - timer interrupt
+ * - "enabled":  run in IM7
+ */
+#ifdef CONFIG_MN10300_TTYSM
+#define MN10300_CLI_LEVEL	EPSW_IM_2
+#else
+#define MN10300_CLI_LEVEL	EPSW_IM_1
+#endif
+
+#define local_save_flags(x)			\
+do {						\
+	typecheck(unsigned long, x);		\
+	asm volatile(				\
+		"	mov epsw,%0	\n"	\
+		: "=d"(x)			\
+		);				\
+} while (0)
+
+#define local_irq_disable()						\
+do {									\
+	asm volatile(							\
+		"	and %0,epsw	\n"				\
+		"	or %1,epsw	\n"				\
+		"	nop		\n"				\
+		"	nop		\n"				\
+		"	nop		\n"				\
+		:							\
+		: "i"(~EPSW_IM), "i"(EPSW_IE | MN10300_CLI_LEVEL)	\
+		);							\
+} while (0)
+
+#define local_irq_save(x)			\
+do {						\
+	local_save_flags(x);			\
+	local_irq_disable();			\
+} while (0)
+
+/*
+ * we make sure local_irq_enable() doesn't cause priority inversion
+ */
+#ifndef __ASSEMBLY__
+
+extern unsigned long __mn10300_irq_enabled_epsw;
+
+#endif
+
+#define local_irq_enable()						\
+do {									\
+	unsigned long tmp;						\
+									\
+	asm volatile(							\
+		"	mov	epsw,%0		\n"			\
+		"	and	%1,%0		\n"			\
+		"	or	%2,%0		\n"			\
+		"	mov	%0,epsw		\n"			\
+		: "=&d"(tmp)						\
+		: "i"(~EPSW_IM), "r"(__mn10300_irq_enabled_epsw)	\
+		);							\
+} while (0)
+
+#define local_irq_restore(x)			\
+do {						\
+	typecheck(unsigned long, x);		\
+	asm volatile(				\
+		"	mov %0,epsw	\n"	\
+		"	nop		\n"	\
+		"	nop		\n"	\
+		"	nop		\n"	\
+		:				\
+		: "d"(x)			\
+		: "memory", "cc"		\
+		);				\
+} while (0)
+
+#define irqs_disabled()				\
+({						\
+	unsigned long flags;			\
+	local_save_flags(flags);		\
+	(flags & EPSW_IM) <= MN10300_CLI_LEVEL;	\
+})
+
+/* hook to save power by halting the CPU
+ * - called from the idle loop
+ * - must reenable interrupts (which takes three instruction cycles to complete)
+ */
+#define safe_halt()							\
+do {									\
+	asm volatile("	or	%0,epsw	\n"				\
+		     "	nop		\n"				\
+		     "	nop		\n"				\
+		     "	bset	%2,(%1)	\n"				\
+		     :							\
+		     : "i"(EPSW_IE|EPSW_IM), "n"(&CPUM), "i"(CPUM_SLEEP)\
+		     : "cc"						\
+		     );							\
+} while (0)
+
+#define STI	or EPSW_IE|EPSW_IM,epsw
+#define CLI	and ~EPSW_IM,epsw; or EPSW_IE|MN10300_CLI_LEVEL,epsw; nop; nop; nop
+
+/*****************************************************************************/
+/*
+ * MN10300 doesn't actually have an exchange instruction
+ */
+#ifndef __ASSEMBLY__
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+static inline
+unsigned long __xchg(volatile unsigned long *m, unsigned long val)
+{
+	unsigned long retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	*m = val;
+	local_irq_restore(flags);
+	return retval;
+}
+
+#define xchg(ptr, v)						\
+	((__typeof__(*(ptr))) __xchg((unsigned long *)(ptr),	\
+				     (unsigned long)(v)))
+
+static inline unsigned long __cmpxchg(volatile unsigned long *m,
+				      unsigned long old, unsigned long new)
+{
+	unsigned long retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	if (retval == old)
+		*m = new;
+	local_irq_restore(flags);
+	return retval;
+}
+
+#define cmpxchg(ptr, o, n)					\
+	((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
+					(unsigned long)(o),	\
+					(unsigned long)(n)))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_SYSTEM_H */
diff --git a/include/asm-mn10300/termbits.h b/include/asm-mn10300/termbits.h
new file mode 100644
index 000000000000..eb2b0dc1f696
--- /dev/null
+++ b/include/asm-mn10300/termbits.h
@@ -0,0 +1,200 @@
+#ifndef _ASM_TERMBITS_H
+#define _ASM_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define   BOTHER  0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000	/* input baud rate (not used) */
+#define CTVB	  004000000000		/* VisioBraille Terminal flow control */
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+#define IBSHIFT	  16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _ASM_TERMBITS_H */
diff --git a/include/asm-mn10300/termios.h b/include/asm-mn10300/termios.h
new file mode 100644
index 000000000000..dd7cf617e118
--- /dev/null
+++ b/include/asm-mn10300/termios.h
@@ -0,0 +1,92 @@
+#ifndef _ASM_TERMIOS_H
+#define _ASM_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+#ifdef __KERNEL__
+/*	intr=^C		quit=^|		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+#endif
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+#define TIOCM_MODEM_BITS       TIOCM_OUT2      /* IRDA support */
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
+	unsigned short __tmp; \
+	get_user(__tmp, &(termio)->x); \
+	*(unsigned short *) &(termios)->x = __tmp; \
+}
+
+#define user_termio_to_kernel_termios(termios, termio) \
+({ \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
+	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
+	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios) \
+({ \
+	put_user((termios)->c_iflag, &(termio)->c_iflag); \
+	put_user((termios)->c_oflag, &(termio)->c_oflag); \
+	put_user((termios)->c_cflag, &(termio)->c_cflag); \
+	put_user((termios)->c_lflag, &(termio)->c_lflag); \
+	put_user((termios)->c_line,  &(termio)->c_line); \
+	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+})
+
+#define user_termios_to_kernel_termios(k, u) \
+	copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) \
+	copy_to_user(u, k, sizeof(struct termios2))
+#define user_termios_to_kernel_termios_1(k, u) \
+	copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) \
+	copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* _ASM_TERMIOS_H */
diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h
new file mode 100644
index 000000000000..e397e7192785
--- /dev/null
+++ b/include/asm-mn10300/thread_info.h
@@ -0,0 +1,168 @@
+/* MN10300 Low-level thread information
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+#define PREEMPT_ACTIVE		0x10000000
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SIZE		(4096)
+#else
+#define THREAD_SIZE		(8192)
+#endif
+
+#define STACK_WARN		(THREAD_SIZE / 8)
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants
+ *   must also be changed
+ */
+#ifndef __ASSEMBLY__
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	__u32			cpu;		/* current CPU */
+	__s32			preempt_count;	/* 0 => preemptable, <0 => BUG */
+
+	mm_segment_t		addr_limit;	/* thread address space:
+						   0-0xBFFFFFFF for user-thead
+						   0-0xFFFFFFFF for kernel-thread
+						*/
+	struct restart_block    restart_block;
+
+	__u8			supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#ifndef __ASM_OFFSETS_H__
+#include <asm/asm-offsets.h>
+#endif
+
+#endif
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ *
+ * preempt_count needs to be 1 initially, until the scheduler is functional.
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= 1,			\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+#define init_uregs							\
+	((struct pt_regs *)						\
+	 ((unsigned long) init_stack + THREAD_SIZE - sizeof(struct pt_regs)))
+
+extern struct thread_info *__current_ti;
+
+/* how to get the thread information struct from C */
+static inline __attribute__((const))
+struct thread_info *current_thread_info(void)
+{
+	struct thread_info *ti;
+	asm("mov sp,%0\n"
+	    "and %1,%0\n"
+	    : "=d" (ti)
+	    : "i" (~(THREAD_SIZE - 1))
+	    : "cc");
+	return ti;
+}
+
+/* how to get the current stack pointer from C */
+static inline unsigned long current_stack_pointer(void)
+{
+	unsigned long sp;
+	asm("mov sp,%0; ":"=r" (sp));
+	return sp;
+}
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#else
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#endif
+
+#define free_thread_info(ti)	kfree((ti))
+#define get_thread_info(ti)	get_task_struct((ti)->task)
+#define put_thread_info(ti)	put_task_struct((ti)->task)
+
+#else /* !__ASSEMBLY__ */
+
+#ifndef __VMLINUX_LDS__
+/* how to get the thread information struct from ASM */
+.macro GET_THREAD_INFO reg
+	mov	sp,\reg
+	and	-THREAD_SIZE,\reg
+.endm
+#endif
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ *   access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* resumption notification requested */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
+#define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
+#define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		17	/* OOM killer killed process */
+#define TIF_FREEZE		18	/* freezing for suspend */
+
+#define _TIF_SYSCALL_TRACE	+(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	+(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		+(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	+(1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		+(1 << TIF_SINGLESTEP)
+#define _TIF_RESTORE_SIGMASK	+(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_POLLING_NRFLAG	+(1 << TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		+(1 << TIF_FREEZE)
+
+#define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
+#define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-mn10300/timer-regs.h b/include/asm-mn10300/timer-regs.h
new file mode 100644
index 000000000000..1d883b7f94ab
--- /dev/null
+++ b/include/asm-mn10300/timer-regs.h
@@ -0,0 +1,293 @@
+/* AM33v2 on-board timer module registers
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_TIMER_REGS_H
+#define _ASM_TIMER_REGS_H
+
+#include <asm/cpu-regs.h>
+#include <asm/intctl-regs.h>
+
+#ifdef __KERNEL__
+
+/* timer prescalar control */
+#define	TMPSCNT			__SYSREG(0xd4003071, u8) /* timer prescaler control */
+#define	TMPSCNT_ENABLE		0x80	/* timer prescaler enable */
+#define	TMPSCNT_DISABLE		0x00	/* timer prescaler disable */
+
+/* 8 bit timers */
+#define	TM0MD			__SYSREG(0xd4003000, u8) /* timer 0 mode register */
+#define	TM0MD_SRC		0x07	/* timer source */
+#define	TM0MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM0MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM0MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM0MD_SRC_TM2IO		0x03	/* - TM2IO pin input */
+#define	TM0MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM0MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM0MD_SRC_TM0IO		0x07	/* - TM0IO pin input */
+#define	TM0MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM0MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM1MD			__SYSREG(0xd4003001, u8) /* timer 1 mode register */
+#define	TM1MD_SRC		0x07	/* timer source */
+#define	TM1MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM1MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM1MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM1MD_SRC_TM0CASCADE	0x03	/* - cascade with timer 0 */
+#define	TM1MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM1MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM1MD_SRC_TM1IO		0x07	/* - TM1IO pin input */
+#define	TM1MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM1MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM2MD			__SYSREG(0xd4003002, u8) /* timer 2 mode register */
+#define	TM2MD_SRC		0x07	/* timer source */
+#define	TM2MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM2MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM2MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM2MD_SRC_TM1CASCADE	0x03	/* - cascade with timer 1 */
+#define	TM2MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM2MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM2MD_SRC_TM2IO		0x07	/* - TM2IO pin input */
+#define	TM2MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM2MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM3MD			__SYSREG(0xd4003003, u8) /* timer 3 mode register */
+#define	TM3MD_SRC		0x07	/* timer source */
+#define	TM3MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM3MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM3MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM3MD_SRC_TM1CASCADE	0x03	/* - cascade with timer 2 */
+#define	TM3MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM3MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM3MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM3MD_SRC_TM3IO		0x07	/* - TM3IO pin input */
+#define	TM3MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM3MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM01MD			__SYSREG(0xd4003000, u16)  /* timer 0:1 mode register */
+
+#define	TM0BR			__SYSREG(0xd4003010, u8)   /* timer 0 base register */
+#define	TM1BR			__SYSREG(0xd4003011, u8)   /* timer 1 base register */
+#define	TM2BR			__SYSREG(0xd4003012, u8)   /* timer 2 base register */
+#define	TM3BR			__SYSREG(0xd4003013, u8)   /* timer 3 base register */
+#define	TM01BR			__SYSREG(0xd4003010, u16)  /* timer 0:1 base register */
+
+#define	TM0BC			__SYSREGC(0xd4003020, u8)  /* timer 0 binary counter */
+#define	TM1BC			__SYSREGC(0xd4003021, u8)  /* timer 1 binary counter */
+#define	TM2BC			__SYSREGC(0xd4003022, u8)  /* timer 2 binary counter */
+#define	TM3BC			__SYSREGC(0xd4003023, u8)  /* timer 3 binary counter */
+#define	TM01BC			__SYSREGC(0xd4003020, u16) /* timer 0:1 binary counter */
+
+#define TM0IRQ			2	/* timer 0 IRQ */
+#define TM1IRQ			3	/* timer 1 IRQ */
+#define TM2IRQ			4	/* timer 2 IRQ */
+#define TM3IRQ			5	/* timer 3 IRQ */
+
+#define	TM0ICR			GxICR(TM0IRQ)	/* timer 0 uflow intr ctrl reg */
+#define	TM1ICR			GxICR(TM1IRQ)	/* timer 1 uflow intr ctrl reg */
+#define	TM2ICR			GxICR(TM2IRQ)	/* timer 2 uflow intr ctrl reg */
+#define	TM3ICR			GxICR(TM3IRQ)	/* timer 3 uflow intr ctrl reg */
+
+/* 16-bit timers 4,5 & 7-11 */
+#define	TM4MD			__SYSREG(0xd4003080, u8)   /* timer 4 mode register */
+#define	TM4MD_SRC		0x07	/* timer source */
+#define	TM4MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM4MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM4MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM4MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM4MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM4MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM4MD_SRC_TM4IO		0x07	/* - TM4IO pin input */
+#define	TM4MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM4MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM5MD			__SYSREG(0xd4003082, u8)   /* timer 5 mode register */
+#define	TM5MD_SRC		0x07	/* timer source */
+#define	TM5MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM5MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM5MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM5MD_SRC_TM4CASCADE	0x03	/* - cascade with timer 4 */
+#define	TM5MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM5MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM5MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM5MD_SRC_TM5IO		0x07	/* - TM5IO pin input */
+#define	TM5MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM5MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM7MD			__SYSREG(0xd4003086, u8)   /* timer 7 mode register */
+#define	TM7MD_SRC		0x07	/* timer source */
+#define	TM7MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM7MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM7MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM7MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM7MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM7MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM7MD_SRC_TM7IO		0x07	/* - TM7IO pin input */
+#define	TM7MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM7MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM8MD			__SYSREG(0xd4003088, u8)   /* timer 8 mode register */
+#define	TM8MD_SRC		0x07	/* timer source */
+#define	TM8MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM8MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM8MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM8MD_SRC_TM7CASCADE	0x03	/* - cascade with timer 7 */
+#define	TM8MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM8MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM8MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM8MD_SRC_TM8IO		0x07	/* - TM8IO pin input */
+#define	TM8MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM8MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM9MD			__SYSREG(0xd400308a, u8)   /* timer 9 mode register */
+#define	TM9MD_SRC		0x07	/* timer source */
+#define	TM9MD_SRC_IOCLK		0x00	/* - IOCLK */
+#define	TM9MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM9MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM9MD_SRC_TM8CASCADE	0x03	/* - cascade with timer 8 */
+#define	TM9MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM9MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM9MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM9MD_SRC_TM9IO		0x07	/* - TM9IO pin input */
+#define	TM9MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM9MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM10MD			__SYSREG(0xd400308c, u8)   /* timer 10 mode register */
+#define	TM10MD_SRC		0x07	/* timer source */
+#define	TM10MD_SRC_IOCLK	0x00	/* - IOCLK */
+#define	TM10MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM10MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM10MD_SRC_TM9CASCADE	0x03	/* - cascade with timer 9 */
+#define	TM10MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM10MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM10MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM10MD_SRC_TM10IO	0x07	/* - TM10IO pin input */
+#define	TM10MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM10MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM11MD			__SYSREG(0xd400308e, u8)   /* timer 11 mode register */
+#define	TM11MD_SRC		0x07	/* timer source */
+#define	TM11MD_SRC_IOCLK	0x00	/* - IOCLK */
+#define	TM11MD_SRC_IOCLK_8	0x01	/* - 1/8 IOCLK */
+#define	TM11MD_SRC_IOCLK_32	0x02	/* - 1/32 IOCLK */
+#define	TM11MD_SRC_TM7CASCADE	0x03	/* - cascade with timer 7 */
+#define	TM11MD_SRC_TM0UFLOW	0x04	/* - timer 0 underflow */
+#define	TM11MD_SRC_TM1UFLOW	0x05	/* - timer 1 underflow */
+#define	TM11MD_SRC_TM2UFLOW	0x06	/* - timer 2 underflow */
+#define	TM11MD_SRC_TM11IO	0x07	/* - TM11IO pin input */
+#define	TM11MD_INIT_COUNTER	0x40	/* initialize TMnBC = TMnBR */
+#define	TM11MD_COUNT_ENABLE	0x80	/* timer count enable */
+
+#define	TM4BR			__SYSREG(0xd4003090, u16)  /* timer 4 base register */
+#define	TM5BR			__SYSREG(0xd4003092, u16)  /* timer 5 base register */
+#define	TM7BR			__SYSREG(0xd4003096, u16)  /* timer 7 base register */
+#define	TM8BR			__SYSREG(0xd4003098, u16)  /* timer 8 base register */
+#define	TM9BR			__SYSREG(0xd400309a, u16)  /* timer 9 base register */
+#define	TM10BR			__SYSREG(0xd400309c, u16)  /* timer 10 base register */
+#define	TM11BR			__SYSREG(0xd400309e, u16)  /* timer 11 base register */
+#define	TM45BR			__SYSREG(0xd4003090, u32)  /* timer 4:5 base register */
+
+#define	TM4BC			__SYSREG(0xd40030a0, u16)  /* timer 4 binary counter */
+#define	TM5BC			__SYSREG(0xd40030a2, u16)  /* timer 5 binary counter */
+#define	TM45BC			__SYSREG(0xd40030a0, u32)  /* timer 4:5 binary counter */
+
+#define	TM7BC			__SYSREG(0xd40030a6, u16)  /* timer 7 binary counter */
+#define	TM8BC			__SYSREG(0xd40030a8, u16)  /* timer 8 binary counter */
+#define	TM9BC			__SYSREG(0xd40030aa, u16)  /* timer 9 binary counter */
+#define	TM10BC			__SYSREG(0xd40030ac, u16)  /* timer 10 binary counter */
+#define	TM11BC			__SYSREG(0xd40030ae, u16)  /* timer 11 binary counter */
+
+#define TM4IRQ			6	/* timer 4 IRQ */
+#define TM5IRQ			7	/* timer 5 IRQ */
+#define TM7IRQ			11	/* timer 7 IRQ */
+#define TM8IRQ			12	/* timer 8 IRQ */
+#define TM9IRQ			13	/* timer 9 IRQ */
+#define TM10IRQ			14	/* timer 10 IRQ */
+#define TM11IRQ			15	/* timer 11 IRQ */
+
+#define	TM4ICR			GxICR(TM4IRQ)	/* timer 4 uflow intr ctrl reg */
+#define	TM5ICR			GxICR(TM5IRQ)	/* timer 5 uflow intr ctrl reg */
+#define	TM7ICR			GxICR(TM7IRQ)	/* timer 7 uflow intr ctrl reg */
+#define	TM8ICR			GxICR(TM8IRQ)	/* timer 8 uflow intr ctrl reg */
+#define	TM9ICR			GxICR(TM9IRQ)	/* timer 9 uflow intr ctrl reg */
+#define	TM10ICR			GxICR(TM10IRQ)	/* timer 10 uflow intr ctrl reg */
+#define	TM11ICR			GxICR(TM11IRQ)	/* timer 11 uflow intr ctrl reg */
+
+/* 16-bit timer 6 */
+#define	TM6MD			__SYSREG(0xd4003084, u16)  /* timer6 mode register */
+#define	TM6MD_SRC		0x0007	/* timer source */
+#define	TM6MD_SRC_IOCLK		0x0000	/* - IOCLK */
+#define	TM6MD_SRC_IOCLK_8	0x0001	/* - 1/8 IOCLK */
+#define	TM6MD_SRC_IOCLK_32	0x0002	/* - 1/32 IOCLK */
+#define	TM6MD_SRC_TM0UFLOW	0x0004	/* - timer 0 underflow */
+#define	TM6MD_SRC_TM1UFLOW	0x0005	/* - timer 1 underflow */
+#define	TM6MD_SRC_TM6IOB_BOTH	0x0006	/* - TM6IOB pin input (both edges) */
+#define	TM6MD_SRC_TM6IOB_SINGLE	0x0007	/* - TM6IOB pin input (single edge) */
+#define	TM6MD_CLR_ENABLE	0x0010	/* clear count enable */
+#define	TM6MD_ONESHOT_ENABLE	0x0040	/* oneshot count */
+#define	TM6MD_TRIG_ENABLE	0x0080	/* TM6IOB pin trigger enable */
+#define TM6MD_PWM		0x3800	/* PWM output mode */
+#define TM6MD_PWM_DIS		0x0000	/* - disabled */
+#define	TM6MD_PWM_10BIT		0x1000	/* - 10 bits mode */
+#define	TM6MD_PWM_11BIT		0x1800	/* - 11 bits mode */
+#define	TM6MD_PWM_12BIT		0x3000	/* - 12 bits mode */
+#define	TM6MD_PWM_14BIT		0x3800	/* - 14 bits mode */
+#define	TM6MD_INIT_COUNTER	0x4000	/* initialize TMnBC to zero */
+#define	TM6MD_COUNT_ENABLE	0x8000	/* timer count enable */
+
+#define	TM6MDA			__SYSREG(0xd40030b4, u8)   /* timer6 cmp/cap A mode reg */
+#define TM6MDA_OUT		0x07	/* output select */
+#define	TM6MDA_OUT_SETA_RESETB	0x00	/* - set at match A, reset at match B */
+#define	TM6MDA_OUT_SETA_RESETOV	0x01	/* - set at match A, reset at overflow */
+#define	TM6MDA_OUT_SETA		0x02	/* - set at match A */
+#define	TM6MDA_OUT_RESETA	0x03	/* - reset at match A */
+#define	TM6MDA_OUT_TOGGLE	0x04	/* - toggle on match A */
+#define TM6MDA_MODE		0xc0	/* compare A register mode */
+#define	TM6MDA_MODE_CMP_SINGLE	0x00	/* - compare, single buffer mode */
+#define	TM6MDA_MODE_CMP_DOUBLE	0x40	/* - compare, double buffer mode */
+#define	TM6MDA_MODE_CAP_S_EDGE	0x80	/* - capture, single edge mode */
+#define	TM6MDA_MODE_CAP_D_EDGE	0xc0	/* - capture, double edge mode */
+#define TM6MDA_EDGE		0x20	/* compare A edge select */
+#define	TM6MDA_EDGE_FALLING	0x00	/* capture on falling edge */
+#define	TM6MDA_EDGE_RISING	0x20	/* capture on rising edge */
+#define	TM6MDA_CAPTURE_ENABLE	0x10	/* capture enable */
+
+#define	TM6MDB			__SYSREG(0xd40030b5, u8)   /* timer6 cmp/cap B mode reg */
+#define TM6MDB_OUT		0x07	/* output select */
+#define	TM6MDB_OUT_SETB_RESETA	0x00	/* - set at match B, reset at match A */
+#define	TM6MDB_OUT_SETB_RESETOV	0x01	/* - set at match B */
+#define	TM6MDB_OUT_RESETB	0x03	/* - reset at match B */
+#define	TM6MDB_OUT_TOGGLE	0x04	/* - toggle on match B */
+#define TM6MDB_MODE		0xc0	/* compare B register mode */
+#define	TM6MDB_MODE_CMP_SINGLE	0x00	/* - compare, single buffer mode */
+#define	TM6MDB_MODE_CMP_DOUBLE	0x40	/* - compare, double buffer mode */
+#define	TM6MDB_MODE_CAP_S_EDGE	0x80	/* - capture, single edge mode */
+#define	TM6MDB_MODE_CAP_D_EDGE	0xc0	/* - capture, double edge mode */
+#define TM6MDB_EDGE		0x20	/* compare B edge select */
+#define	TM6MDB_EDGE_FALLING	0x00	/* capture on falling edge */
+#define	TM6MDB_EDGE_RISING	0x20	/* capture on rising edge */
+#define	TM6MDB_CAPTURE_ENABLE	0x10	/* capture enable */
+
+#define	TM6CA			__SYSREG(0xd40030c4, u16)   /* timer6 cmp/capture reg A */
+#define	TM6CB			__SYSREG(0xd40030d4, u16)   /* timer6 cmp/capture reg B */
+#define	TM6BC			__SYSREG(0xd40030a4, u16)   /* timer6 binary counter */
+
+#define TM6IRQ			6	/* timer 6 IRQ */
+#define TM6AIRQ			9	/* timer 6A IRQ */
+#define TM6BIRQ			10	/* timer 6B IRQ */
+
+#define	TM6ICR			GxICR(TM6IRQ)	/* timer 6 uflow intr ctrl reg */
+#define	TM6AICR			GxICR(TM6AIRQ)	/* timer 6A intr control reg */
+#define	TM6BICR			GxICR(TM6BIRQ)	/* timer 6B intr control reg */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_TIMER_REGS_H */
diff --git a/include/asm-mn10300/timex.h b/include/asm-mn10300/timex.h
new file mode 100644
index 000000000000..3944277dab67
--- /dev/null
+++ b/include/asm-mn10300/timex.h
@@ -0,0 +1,33 @@
+/* MN10300 Architecture time management specifications
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_TIMEX_H
+#define _ASM_TIMEX_H
+
+#include <asm/hardirq.h>
+#include <asm/unit/timex.h>
+
+#define TICK_SIZE (tick_nsec / 1000)
+
+#define CLOCK_TICK_RATE 1193180 /* Underlying HZ - this should probably be set
+				 * to something appropriate, but what? */
+
+extern cycles_t cacheflush_time;
+
+#ifdef __KERNEL__
+
+static inline cycles_t get_cycles(void)
+{
+	return read_timestamp_counter();
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_TIMEX_H */
diff --git a/include/asm-mn10300/tlb.h b/include/asm-mn10300/tlb.h
new file mode 100644
index 000000000000..65d232b96613
--- /dev/null
+++ b/include/asm-mn10300/tlb.h
@@ -0,0 +1,34 @@
+/* MN10300 TLB definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_TLB_H
+#define _ASM_TLB_H
+
+#include <asm/tlbflush.h>
+
+extern void check_pgt_cache(void);
+
+/*
+ * we don't need any special per-pte or per-vma handling...
+ */
+#define tlb_start_vma(tlb, vma)				do { } while (0)
+#define tlb_end_vma(tlb, vma)				do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
+
+/*
+ * .. because we flush the whole mm when it fills up
+ */
+#define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
+
+/* for now, just use the generic stuff */
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_TLB_H */
diff --git a/include/asm-mn10300/tlbflush.h b/include/asm-mn10300/tlbflush.h
new file mode 100644
index 000000000000..e0239865abcb
--- /dev/null
+++ b/include/asm-mn10300/tlbflush.h
@@ -0,0 +1,80 @@
+/* MN10300 TLB flushing functions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_TLBFLUSH_H
+#define _ASM_TLBFLUSH_H
+
+#include <asm/processor.h>
+
+#define __flush_tlb()						\
+do {								\
+	int w;							\
+	__asm__ __volatile__					\
+		("	mov %1,%0		\n"		\
+		 "	or %2,%0		\n"		\
+		 "	mov %0,%1		\n"		\
+		 : "=d"(w)					\
+		 : "m"(MMUCTR), "i"(MMUCTR_IIV|MMUCTR_DIV)	\
+		 : "memory"					\
+		 );						\
+} while (0)
+
+#define __flush_tlb_all() __flush_tlb()
+#define __flush_tlb_one(addr) __flush_tlb()
+
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ */
+#define flush_tlb_all()				\
+do {						\
+	preempt_disable();			\
+	__flush_tlb_all();			\
+	preempt_enable();			\
+} while (0)
+
+#define flush_tlb_mm(mm)			\
+do {						\
+	preempt_disable();			\
+	__flush_tlb_all();			\
+	preempt_enable();			\
+} while (0)
+
+#define flush_tlb_range(vma, start, end)			\
+do {								\
+	unsigned long __s __attribute__((unused)) = (start);	\
+	unsigned long __e __attribute__((unused)) = (end);	\
+	preempt_disable();					\
+	__flush_tlb_all();					\
+	preempt_enable();					\
+} while (0)
+
+
+#define __flush_tlb_global()			flush_tlb_all()
+#define flush_tlb()				flush_tlb_all()
+#define flush_tlb_kernel_range(start, end)			\
+do {								\
+	unsigned long __s __attribute__((unused)) = (start);	\
+	unsigned long __e __attribute__((unused)) = (end);	\
+	flush_tlb_all();					\
+} while (0)
+
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+
+#define flush_tlb_pgtables(mm, start, end)	do {} while (0)
+
+#endif /* _ASM_TLBFLUSH_H */
diff --git a/include/asm-mn10300/topology.h b/include/asm-mn10300/topology.h
new file mode 100644
index 000000000000..5428f333a02c
--- /dev/null
+++ b/include/asm-mn10300/topology.h
@@ -0,0 +1 @@
+#include <asm-generic/topology.h>
diff --git a/include/asm-mn10300/types.h b/include/asm-mn10300/types.h
new file mode 100644
index 000000000000..d40ea7628bfc
--- /dev/null
+++ b/include/asm-mn10300/types.h
@@ -0,0 +1,67 @@
+/* MN10300 Basic type definitions
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_TYPES_H
+#define _ASM_TYPES_H
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#define BITS_PER_LONG 32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+/* Dma addresses are 32-bits wide.  */
+typedef u32 dma_addr_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_TYPES_H */
diff --git a/include/asm-mn10300/uaccess.h b/include/asm-mn10300/uaccess.h
new file mode 100644
index 000000000000..46b9b647f3c3
--- /dev/null
+++ b/include/asm-mn10300/uaccess.h
@@ -0,0 +1,490 @@
+/* MN10300 userspace access functions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UACCESS_H
+#define _ASM_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#define KERNEL_XDS	MAKE_MM_SEG(0xBFFFFFFF)
+#define KERNEL_DS	MAKE_MM_SEG(0x9FFFFFFF)
+#define USER_DS		MAKE_MM_SEG(TASK_SIZE)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+#define __kernel_ds_p() (current_thread_info()->addr_limit.seg == 0x9FFFFFFF)
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#define __addr_ok(addr) \
+	((unsigned long)(addr) < (current_thread_info()->addr_limit.seg))
+
+/*
+ * check that a range of addresses falls within the current address limit
+ */
+static inline int ___range_ok(unsigned long addr, unsigned int size)
+{
+	int flag = 1, tmp;
+
+	asm("	add	%3,%1	\n"	/* set C-flag if addr + size > 4Gb */
+	    "	bcs	0f	\n"
+	    "	cmp	%4,%1	\n"	/* jump if addr+size>limit (error) */
+	    "	bhi	0f	\n"
+	    "	clr	%0	\n"	/* mark okay */
+	    "0:			\n"
+	    : "=r"(flag), "=&r"(tmp)
+	    : "1"(addr), "ir"(size),
+	      "r"(current_thread_info()->addr_limit.seg), "0"(flag)
+	    : "cc"
+	    );
+
+	return flag;
+}
+
+#define __range_ok(addr, size) ___range_ok((unsigned long)(addr), (u32)(size))
+
+#define access_ok(type, addr, size) (__range_ok((addr), (size)) == 0)
+#define __access_ok(addr, size)     (__range_ok((addr), (size)) == 0)
+
+static inline int verify_area(int type, const void *addr, unsigned long size)
+{
+	return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern int fixup_exception(struct pt_regs *regs);
+
+#define put_user(x, ptr) __put_user_check((x), (ptr), sizeof(*(ptr)))
+#define get_user(x, ptr) __get_user_check((x), (ptr), sizeof(*(ptr)))
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the user has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr) __put_user_nocheck((x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+/*
+ * The "xxx_ret" versions return constant specified in third argument, if
+ * something bad happens. These macros can be optimized for the
+ * case of just returning from the function xxx_ret is used.
+ */
+
+#define put_user_ret(x, ptr, ret) \
+	({ if (put_user((x), (ptr)))	return (ret); })
+#define get_user_ret(x, ptr, ret) \
+	({ if (get_user((x), (ptr)))	return (ret); })
+#define __put_user_ret(x, ptr, ret) \
+	({ if (__put_user((x), (ptr)))	return (ret); })
+#define __get_user_ret(x, ptr, ret) \
+	({ if (__get_user((x), (ptr)))	return (ret); })
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+#define __get_user_nocheck(x, ptr, size)	\
+({						\
+	__typeof(*(ptr)) __gu_val;		\
+	unsigned long __gu_addr;		\
+	int __gu_err;				\
+	__gu_addr = (unsigned long) (ptr);	\
+	switch (size) {				\
+	case 1:  __get_user_asm("bu"); break;	\
+	case 2:  __get_user_asm("hu"); break;	\
+	case 4:  __get_user_asm(""  ); break;	\
+	default: __get_user_unknown(); break;	\
+	}					\
+	x = (__typeof__(*(ptr))) __gu_val;	\
+	__gu_err;				\
+})
+
+#define __get_user_check(x, ptr, size)			\
+({							\
+	__typeof__(*(ptr)) __gu_val;			\
+	unsigned long __gu_addr;			\
+	int __gu_err;					\
+	__gu_addr = (unsigned long) (ptr);		\
+	if (likely(__access_ok(__gu_addr,size))) {	\
+		switch (size) {				\
+		case 1:  __get_user_asm("bu"); break;	\
+		case 2:  __get_user_asm("hu"); break;	\
+		case 4:  __get_user_asm(""  ); break;	\
+		default: __get_user_unknown(); break;	\
+		}					\
+	}						\
+	else {						\
+		__gu_err = -EFAULT;			\
+		__gu_val = 0;				\
+	}						\
+	x = (__typeof__(*(ptr))) __gu_val;		\
+	__gu_err;					\
+})
+
+#define __get_user_asm(INSN)					\
+({								\
+	asm volatile(					\
+		"1:\n"						\
+		"	mov"INSN"	%2,%1\n"		\
+		"	mov		0,%0\n"			\
+		"2:\n"						\
+		"	.section	.fixup,\"ax\"\n"	\
+		"3:\n\t"					\
+		"	mov		%3,%0\n"		\
+		"	jmp		2b\n"			\
+		"	.previous\n"				\
+		"	.section	__ex_table,\"a\"\n"	\
+		"	.balign		4\n"			\
+		"	.long		1b, 3b\n"		\
+		"	.previous"				\
+		: "=&r" (__gu_err), "=&r" (__gu_val)		\
+		: "m" (__m(__gu_addr)), "i" (-EFAULT));		\
+})
+
+extern int __get_user_unknown(void);
+
+#define __put_user_nocheck(x, ptr, size)			\
+({								\
+	union {							\
+		__typeof__(*(ptr)) val;				\
+		u32 bits[2];					\
+	} __pu_val;						\
+	unsigned long __pu_addr;				\
+	int __pu_err;						\
+	__pu_val.val = (x);					\
+	__pu_addr = (unsigned long) (ptr);			\
+	switch (size) {						\
+	case 1:  __put_user_asm("bu"); break;			\
+	case 2:  __put_user_asm("hu"); break;			\
+	case 4:  __put_user_asm(""  ); break;			\
+	case 8:  __put_user_asm8();    break;			\
+	default: __pu_err = __put_user_unknown(); break;	\
+	}							\
+	__pu_err;						\
+})
+
+#define __put_user_check(x, ptr, size)					\
+({									\
+	union {								\
+		__typeof__(*(ptr)) val;					\
+		u32 bits[2];						\
+	} __pu_val;							\
+	unsigned long __pu_addr;					\
+	int __pu_err;							\
+	__pu_val.val = (x);						\
+	__pu_addr = (unsigned long) (ptr);				\
+	if (likely(__access_ok(__pu_addr, size))) {			\
+		switch (size) {						\
+		case 1:  __put_user_asm("bu"); break;			\
+		case 2:  __put_user_asm("hu"); break;			\
+		case 4:  __put_user_asm(""  ); break;			\
+		case 8:  __put_user_asm8();    break;			\
+		default: __pu_err = __put_user_unknown(); break;	\
+		}							\
+	}								\
+	else {								\
+		__pu_err = -EFAULT;					\
+	}								\
+	__pu_err;							\
+})
+
+#define __put_user_asm(INSN)					\
+({								\
+	asm volatile(						\
+		"1:\n"						\
+		"	mov"INSN"	%1,%2\n"		\
+		"	mov		0,%0\n"			\
+		"2:\n"						\
+		"	.section	.fixup,\"ax\"\n"	\
+		"3:\n"						\
+		"	mov		%3,%0\n"		\
+		"	jmp		2b\n"			\
+		"	.previous\n"				\
+		"	.section	__ex_table,\"a\"\n"	\
+		"	.balign		4\n"			\
+		"	.long		1b, 3b\n"		\
+		"	.previous"				\
+		: "=&r" (__pu_err)				\
+		: "r" (__pu_val.val), "m" (__m(__pu_addr)),	\
+		  "i" (-EFAULT)					\
+		);						\
+})
+
+#define __put_user_asm8()						\
+({									\
+	asm volatile(							\
+		"1:	mov		%1,%3		\n"		\
+		"2:	mov		%2,%4		\n"		\
+		"	mov		0,%0		\n"		\
+		"3:					\n"		\
+		"	.section	.fixup,\"ax\"	\n"		\
+		"4:					\n"		\
+		"	mov		%5,%0		\n"		\
+		"	jmp		2b		\n"		\
+		"	.previous			\n"		\
+		"	.section	__ex_table,\"a\"\n"		\
+		"	.balign		4		\n"		\
+		"	.long		1b, 4b		\n"		\
+		"	.long		2b, 4b		\n"		\
+		"	.previous			\n"		\
+		: "=&r" (__pu_err)					\
+		: "r" (__pu_val.bits[0]), "r" (__pu_val.bits[1]),	\
+		  "m" (__m(__pu_addr)), "m" (__m(__pu_addr+4)),		\
+		  "i" (-EFAULT)						\
+		);							\
+})
+
+extern int __put_user_unknown(void);
+
+
+/*
+ * Copy To/From Userspace
+ */
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to, from, size)					\
+do {									\
+	if (size) {							\
+		void *__to = to;					\
+		const void *__from = from;				\
+		int w;							\
+		asm volatile(						\
+			"0:     movbu	(%0),%3;\n"			\
+			"1:     movbu	%3,(%1);\n"			\
+			"	inc	%0;\n"				\
+			"	inc	%1;\n"				\
+			"       add	-1,%2;\n"			\
+			"       bne	0b;\n"				\
+			"2:\n"						\
+			"	.section .fixup,\"ax\"\n"		\
+			"3:	jmp	2b\n"				\
+			"	.previous\n"				\
+			"	.section __ex_table,\"a\"\n"		\
+			"       .balign	4\n"				\
+			"       .long	0b,3b\n"			\
+			"       .long	1b,3b\n"			\
+			"	.previous\n"				\
+			: "=a"(__from), "=a"(__to), "=r"(size), "=&r"(w)\
+			: "0"(__from), "1"(__to), "2"(size)		\
+			: "memory");					\
+	}								\
+} while (0)
+
+#define __copy_user_zeroing(to, from, size)				\
+do {									\
+	if (size) {							\
+		void *__to = to;					\
+		const void *__from = from;				\
+		int w;							\
+		asm volatile(						\
+			"0:     movbu	(%0),%3;\n"			\
+			"1:     movbu	%3,(%1);\n"			\
+			"	inc	%0;\n"				\
+			"	inc	%1;\n"				\
+			"       add	-1,%2;\n"			\
+			"       bne	0b;\n"				\
+			"2:\n"						\
+			"	.section .fixup,\"ax\"\n"		\
+			"3:\n"						\
+			"	mov	%2,%0\n"			\
+			"	clr	%3\n"				\
+			"4:     movbu	%3,(%1);\n"			\
+			"	inc	%1;\n"				\
+			"       add	-1,%2;\n"			\
+			"       bne	4b;\n"				\
+			"	mov	%0,%2\n"			\
+			"	jmp	2b\n"				\
+			"	.previous\n"				\
+			"	.section __ex_table,\"a\"\n"		\
+			"       .balign	4\n"				\
+			"       .long	0b,3b\n"			\
+			"       .long	1b,3b\n"			\
+			"	.previous\n"				\
+			: "=a"(__from), "=a"(__to), "=r"(size), "=&r"(w)\
+			: "0"(__from), "1"(__to), "2"(size)		\
+			: "memory");					\
+	}								\
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline
+unsigned long __generic_copy_from_user_nocheck(void *to, const void *from,
+					       unsigned long n)
+{
+	__copy_user_zeroing(to, from, n);
+	return n;
+}
+
+static inline
+unsigned long __generic_copy_to_user_nocheck(void *to, const void *from,
+					     unsigned long n)
+{
+	__copy_user(to, from, n);
+	return n;
+}
+
+
+#if 0
+#error don't use - these macros don't increment to & from pointers
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size)	\
+do {						\
+	asm volatile(				\
+		"       mov %0,a0;\n"		\
+		"0:     movbu (%1),d3;\n"	\
+		"1:     movbu d3,(%2);\n"	\
+		"       add -1,a0;\n"		\
+		"       bne 0b;\n"		\
+		"2:;"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:	jmp 2b\n"		\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n"	\
+		"       .balign 4\n"		\
+		"       .long 0b,3b\n"		\
+		"       .long 1b,3b\n"		\
+		".previous"			\
+		:				\
+		: "d"(size), "d"(to), "d"(from)	\
+		: "d3", "a0");			\
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size)	\
+do {							\
+	asm volatile(					\
+		"       mov %0,a0;\n"			\
+		"0:     movbu (%1),d3;\n"		\
+		"1:     movbu d3,(%2);\n"		\
+		"       add -1,a0;\n"			\
+		"       bne 0b;\n"			\
+		"2:;"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	jmp 2b\n"			\
+		".previous\n"				\
+		".section __ex_table,\"a\"\n"		\
+		"       .balign 4\n"			\
+		"       .long 0b,3b\n"			\
+		"       .long 1b,3b\n"			\
+		".previous"				\
+		:					\
+		: "d"(size), "d"(to), "d"(from)		\
+		: "d3", "a0");				\
+} while (0)
+
+static inline
+unsigned long __constant_copy_to_user(void *to, const void *from,
+				      unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		__constant_copy_user(to, from, n);
+	return n;
+}
+
+static inline
+unsigned long __constant_copy_from_user(void *to, const void *from,
+					unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		__constant_copy_user_zeroing(to, from, n);
+	return n;
+}
+
+static inline
+unsigned long __constant_copy_to_user_nocheck(void *to, const void *from,
+					      unsigned long n)
+{
+	__constant_copy_user(to, from, n);
+	return n;
+}
+
+static inline
+unsigned long __constant_copy_from_user_nocheck(void *to, const void *from,
+						unsigned long n)
+{
+	__constant_copy_user_zeroing(to, from, n);
+	return n;
+}
+#endif
+
+extern unsigned long __generic_copy_to_user(void __user *, const void *,
+					    unsigned long);
+extern unsigned long __generic_copy_from_user(void *, const void __user *,
+					      unsigned long);
+
+#define __copy_to_user_inatomic(to, from, n) \
+	__generic_copy_to_user_nocheck((to), (from), (n))
+#define __copy_from_user_inatomic(to, from, n) \
+	__generic_copy_from_user_nocheck((to), (from), (n))
+
+#define __copy_to_user(to, from, n)			\
+({							\
+	might_sleep();					\
+	__copy_to_user_inatomic((to), (from), (n));	\
+})
+
+#define __copy_from_user(to, from, n)			\
+({							\
+	might_sleep();					\
+	__copy_from_user_inatomic((to), (from), (n));	\
+})
+
+
+#define copy_to_user(to, from, n)   __generic_copy_to_user((to), (from), (n))
+#define copy_from_user(to, from, n) __generic_copy_from_user((to), (from), (n))
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+extern long strnlen_user(const char __user *str, long n);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+extern unsigned long clear_user(void __user *mem, unsigned long len);
+extern unsigned long __clear_user(void __user *mem, unsigned long len);
+
+#endif /* _ASM_UACCESS_H */
diff --git a/include/asm-mn10300/ucontext.h b/include/asm-mn10300/ucontext.h
new file mode 100644
index 000000000000..fcab5c1d8e18
--- /dev/null
+++ b/include/asm-mn10300/ucontext.h
@@ -0,0 +1,22 @@
+/* MN10300 User context
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UCONTEXT_H
+#define _ASM_UCONTEXT_H
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* _ASM_UCONTEXT_H */
diff --git a/include/asm-mn10300/unaligned.h b/include/asm-mn10300/unaligned.h
new file mode 100644
index 000000000000..cad3afbd035f
--- /dev/null
+++ b/include/asm-mn10300/unaligned.h
@@ -0,0 +1,136 @@
+/* MN10300 Unaligned memory access handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNALIGNED_H
+#define _ASM_UNALIGNED_H
+
+#include <asm/types.h>
+
+#if 0
+extern int __bug_unaligned_x(void *ptr);
+
+/*
+ * What is the most efficient way of loading/storing an unaligned value?
+ *
+ * That is the subject of this file.  Efficiency here is defined as
+ * minimum code size with minimum register usage for the common cases.
+ * It is currently not believed that long longs are common, so we
+ * trade efficiency for the chars, shorts and longs against the long
+ * longs.
+ *
+ * Current stats with gcc 2.7.2.2 for these functions:
+ *
+ *	ptrsize	get:	code	regs	put:	code	regs
+ *	1		1	1		1	2
+ *	2		3	2		3	2
+ *	4		7	3		7	3
+ *	8		20	6		16	6
+ *
+ * gcc 2.95.1 seems to code differently:
+ *
+ *	ptrsize	get:	code	regs	put:	code	regs
+ *	1		1	1		1	2
+ *	2		3	2		3	2
+ *	4		7	4		7	4
+ *	8		19	8		15	6
+ *
+ * which may or may not be more efficient (depending upon whether
+ * you can afford the extra registers).  Hopefully the gcc 2.95
+ * is inteligent enough to decide if it is better to use the
+ * extra register, but evidence so far seems to suggest otherwise.
+ *
+ * Unfortunately, gcc is not able to optimise the high word
+ * out of long long >> 32, or the low word from long long << 32
+ */
+
+#define __get_unaligned_2(__p)					\
+	(__p[0] | __p[1] << 8)
+
+#define __get_unaligned_4(__p)					\
+	(__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24)
+
+#define get_unaligned(ptr)					\
+({								\
+	unsigned int __v1, __v2;				\
+	__typeof__(*(ptr)) __v;					\
+	__u8 *__p = (__u8 *)(ptr);				\
+								\
+	switch (sizeof(*(ptr))) {				\
+	case 1:	__v = *(ptr);			break;		\
+	case 2: __v = __get_unaligned_2(__p);	break;		\
+	case 4: __v = __get_unaligned_4(__p);	break;		\
+	case 8:							\
+		__v2 = __get_unaligned_4((__p+4));		\
+		__v1 = __get_unaligned_4(__p);			\
+		__v = ((unsigned long long)__v2 << 32 | __v1);	\
+		break;						\
+	default: __v = __bug_unaligned_x(__p);	break;		\
+	}							\
+	__v;							\
+})
+
+
+static inline void __put_unaligned_2(__u32 __v, register __u8 *__p)
+{
+	*__p++ = __v;
+	*__p++ = __v >> 8;
+}
+
+static inline void __put_unaligned_4(__u32 __v, register __u8 *__p)
+{
+	__put_unaligned_2(__v >> 16, __p + 2);
+	__put_unaligned_2(__v, __p);
+}
+
+static inline void __put_unaligned_8(const unsigned long long __v, __u8 *__p)
+{
+	/*
+	 * tradeoff: 8 bytes of stack for all unaligned puts (2
+	 * instructions), or an extra register in the long long
+	 * case - go for the extra register.
+	 */
+	__put_unaligned_4(__v >> 32, __p + 4);
+	__put_unaligned_4(__v, __p);
+}
+
+/*
+ * Try to store an unaligned value as efficiently as possible.
+ */
+#define put_unaligned(val, ptr)						\
+	({								\
+		switch (sizeof(*(ptr))) {				\
+		case 1:							\
+			*(ptr) = (val);					\
+			break;						\
+		case 2:							\
+			__put_unaligned_2((val), (__u8 *)(ptr));	\
+			break;						\
+		case 4:							\
+			__put_unaligned_4((val), (__u8 *)(ptr));	\
+			break;						\
+		case 8:							\
+			__put_unaligned_8((val), (__u8 *)(ptr));	\
+			break;						\
+		default:						\
+			__bug_unaligned_x(ptr);				\
+			break;						\
+		}							\
+		(void) 0;						\
+	})
+
+
+#else
+
+#define get_unaligned(ptr) (*(ptr))
+#define put_unaligned(val, ptr) ({ *(ptr) = (val); (void) 0; })
+
+#endif
+
+#endif
diff --git a/include/asm-mn10300/unistd.h b/include/asm-mn10300/unistd.h
new file mode 100644
index 000000000000..3721aa9e195d
--- /dev/null
+++ b/include/asm-mn10300/unistd.h
@@ -0,0 +1,384 @@
+/* MN10300 System call number list
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNISTD_H
+#define _ASM_UNISTD_H
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_lchown		 16
+#define __NR_break		 17
+#define __NR_oldstat		 18
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_oldfstat		 28
+#define __NR_pause		 29
+#define __NR_utime		 30
+#define __NR_stty		 31
+#define __NR_gtty		 32
+#define __NR_access		 33
+#define __NR_nice		 34
+#define __NR_ftime		 35
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_prof		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_signal		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+#define __NR_lock		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_mpx		 56
+#define __NR_setpgid		 57
+#define __NR_ulimit		 58
+#define __NR_oldolduname	 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_sigaction		 67
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_sigsuspend		 72
+#define __NR_sigpending		 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76	/* Back compatible 2Gig limited rlimit */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_oldlstat		 84
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+#define __NR_readdir		 89
+#define __NR_mmap		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_profil		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_ioperm		101
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+#define __NR_olduname		109
+#define __NR_iopl		110
+#define __NR_vhangup		111
+#define __NR_idle		112
+#define __NR_vm86old		113
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+#define __NR_sigreturn		119
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+#define __NR_modify_ldt		123
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+#define __NR_sigprocmask	126
+#define __NR_create_module	127
+#define __NR_init_module	128
+#define __NR_delete_module	129
+#define __NR_get_kernel_syms	130
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_vm86		166
+#define __NR_query_module	167
+#define __NR_poll		168
+#define __NR_nfsservctl		169
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_prctl              172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread64		180
+#define __NR_pwrite64		181
+#define __NR_chown		182
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+#define __NR_getpmsg		188	/* some people actually want streams */
+#define __NR_putpmsg		189	/* some people actually want streams */
+#define __NR_vfork		190
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+#define __NR_mincore		218
+#define __NR_madvise		219
+#define __NR_madvise1		219	/* delete when C lib stub is removed */
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+/* 223 is unused */
+#define __NR_gettid		224
+#define __NR_readahead		225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+#define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+#define __NR_fadvise64		250
+
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+#define __NR_epoll_create	254
+#define __NR_epoll_ctl		255
+#define __NR_epoll_wait		256
+#define __NR_remap_file_pages	257
+#define __NR_set_tid_address	258
+#define __NR_timer_create	259
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_statfs64		268
+#define __NR_fstatfs64		269
+#define __NR_tgkill		270
+#define __NR_utimes		271
+#define __NR_fadvise64_64	272
+#define __NR_vserver		273
+#define __NR_mbind		274
+#define __NR_get_mempolicy	275
+#define __NR_set_mempolicy	276
+#define __NR_mq_open 		277
+#define __NR_mq_unlink		(__NR_mq_open+1)
+#define __NR_mq_timedsend	(__NR_mq_open+2)
+#define __NR_mq_timedreceive	(__NR_mq_open+3)
+#define __NR_mq_notify		(__NR_mq_open+4)
+#define __NR_mq_getsetattr	(__NR_mq_open+5)
+#define __NR_kexec_load		283
+#define __NR_waitid		284
+#define __NR_add_key		286
+#define __NR_request_key	287
+#define __NR_keyctl		288
+#define __NR_cacheflush		289
+#define __NR_ioprio_set		290
+#define __NR_ioprio_get		291
+#define __NR_inotify_init	292
+#define __NR_inotify_add_watch	293
+#define __NR_inotify_rm_watch	294
+#define __NR_migrate_pages	295
+#define __NR_openat		296
+#define __NR_mkdirat		297
+#define __NR_mknodat		298
+#define __NR_fchownat		299
+#define __NR_futimesat		300
+#define __NR_fstatat64		301
+#define __NR_unlinkat		302
+#define __NR_renameat		303
+#define __NR_linkat		304
+#define __NR_symlinkat		305
+#define __NR_readlinkat		306
+#define __NR_fchmodat		307
+#define __NR_faccessat		308
+#define __NR_pselect6		309
+#define __NR_ppoll		310
+#define __NR_unshare		311
+#define __NR_set_robust_list	312
+#define __NR_get_robust_list	313
+#define __NR_splice		314
+#define __NR_sync_file_range	315
+#define __NR_tee		316
+#define __NR_vmsplice		317
+#define __NR_move_pages		318
+#define __NR_getcpu		319
+#define __NR_epoll_pwait	320
+#define __NR_utimensat		321
+#define __NR_signalfd		322
+#define __NR_timerfd_create	323
+#define __NR_eventfd		324
+#define __NR_fallocate		325
+#define __NR_timerfd_settime	326
+#define __NR_timerfd_gettime	327
+
+#ifdef __KERNEL__
+
+#define NR_syscalls 326
+
+/*
+ * specify the deprecated syscalls we want to support on this arch
+ */
+#define __ARCH_WANT_IPC_PARSE_VERSION
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SGETMASK
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#ifndef cond_syscall
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_UNISTD_H */
diff --git a/include/asm-mn10300/unit-asb2303/clock.h b/include/asm-mn10300/unit-asb2303/clock.h
new file mode 100644
index 000000000000..8b450e920af1
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2303/clock.h
@@ -0,0 +1,45 @@
+/* ASB2303-specific clocks
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_UNIT_CLOCK_H
+#define _ASM_UNIT_CLOCK_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_MN10300_RTC
+
+extern unsigned long mn10300_ioclk;	/* IOCLK (crystal speed) in HZ */
+extern unsigned long mn10300_iobclk;
+extern unsigned long mn10300_tsc_per_HZ;
+
+#define MN10300_IOCLK		((unsigned long)mn10300_ioclk)
+/* If this processors has a another clock, uncomment the below. */
+/* #define MN10300_IOBCLK	((unsigned long)mn10300_iobclk) */
+
+#else /* !CONFIG_MN10300_RTC */
+
+#define MN10300_IOCLK		33333333UL
+/* #define MN10300_IOBCLK	66666666UL */
+
+#endif /* !CONFIG_MN10300_RTC */
+
+#define MN10300_JCCLK		MN10300_IOCLK
+#define MN10300_TSCCLK		MN10300_IOCLK
+
+#ifdef CONFIG_MN10300_RTC
+#define MN10300_TSC_PER_HZ	((unsigned long)mn10300_tsc_per_HZ)
+#else /* !CONFIG_MN10300_RTC */
+#define MN10300_TSC_PER_HZ	(MN10300_TSCCLK/HZ)
+#endif /* !CONFIG_MN10300_RTC */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_CLOCK_H */
diff --git a/include/asm-mn10300/unit-asb2303/leds.h b/include/asm-mn10300/unit-asb2303/leds.h
new file mode 100644
index 000000000000..3a7543ea7b5c
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2303/leds.h
@@ -0,0 +1,43 @@
+/* ASB2303-specific LEDs
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_UNIT_LEDS_H
+#define _ASM_UNIT_LEDS_H
+
+#include <asm/pio-regs.h>
+#include <asm/cpu-regs.h>
+#include <asm/exceptions.h>
+
+#define ASB2303_GPIO0DEF	__SYSREG(0xDB000000, u32)
+#define ASB2303_7SEGLEDS	__SYSREG(0xDB000008, u32)
+
+/*
+ * use the 7-segment LEDs to indicate states
+ */
+
+/* flip the 7-segment LEDs between "G" and "-" */
+#define mn10300_set_gdbleds(ONOFF)			\
+do {							\
+	ASB2303_7SEGLEDS = (ONOFF) ? 0x85 : 0x7f;	\
+} while (0)
+
+/* indicate double-fault by displaying "d" on the LEDs */
+#define mn10300_set_dbfleds			\
+	mov	0x43,d0			;	\
+	movbu	d0,(ASB2303_7SEGLEDS)
+
+#ifndef __ASSEMBLY__
+extern void peripheral_leds_display_exception(enum exception_code code);
+extern void peripheral_leds_led_chase(void);
+extern void debug_to_serial(const char *p, int n);
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_LEDS_H */
diff --git a/include/asm-mn10300/unit-asb2303/serial.h b/include/asm-mn10300/unit-asb2303/serial.h
new file mode 100644
index 000000000000..0d55cf5896ac
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2303/serial.h
@@ -0,0 +1,136 @@
+/* ASB2303-specific 8250 serial ports
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_UNIT_SERIAL_H
+#define _ASM_UNIT_SERIAL_H
+
+#include <asm/cpu-regs.h>
+#include <asm/proc/irq.h>
+#include <linux/serial_reg.h>
+
+#define SERIAL_PORT0_BASE_ADDRESS	0xA6FB0000
+#define SERIAL_PORT1_BASE_ADDRESS	0xA6FC0000
+
+#define SERIAL_IRQ	XIRQ0	/* Dual serial (PC16552)	(Hi) */
+
+/*
+ * dispose of the /dev/ttyS0 and /dev/ttyS1 serial ports
+ */
+#ifndef CONFIG_GDBSTUB_ON_TTYSx
+
+#define SERIAL_PORT_DFNS						\
+	{								\
+	.baud_base		= BASE_BAUD,				\
+	.irq			= SERIAL_IRQ,				\
+	.flags			= STD_COM_FLAGS,			\
+	.iomem_base		= (u8 *) SERIAL_PORT0_BASE_ADDRESS,	\
+	.iomem_reg_shift	= 2,					\
+	.io_type		= SERIAL_IO_MEM,			\
+	},								\
+	{								\
+	.baud_base		= BASE_BAUD,				\
+	.irq			= SERIAL_IRQ,				\
+	.flags			= STD_COM_FLAGS,			\
+	.iomem_base		= (u8 *) SERIAL_PORT1_BASE_ADDRESS,	\
+	.iomem_reg_shift	= 2,					\
+	.io_type		= SERIAL_IO_MEM,			\
+	},
+
+#ifndef __ASSEMBLY__
+
+static inline void __debug_to_serial(const char *p, int n)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#else /* CONFIG_GDBSTUB_ON_TTYSx */
+
+#define SERIAL_PORT_DFNS /* both stolen by gdb-stub because they share an IRQ */
+
+#if defined(CONFIG_GDBSTUB_ON_TTYS0)
+#define GDBPORT_SERIAL_RX	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_RX  * 4, u8)
+#define GDBPORT_SERIAL_TX	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_TX  * 4, u8)
+#define GDBPORT_SERIAL_DLL	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_DLL * 4, u8)
+#define GDBPORT_SERIAL_DLM	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_DLM * 4, u8)
+#define GDBPORT_SERIAL_IER	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_IER * 4, u8)
+#define GDBPORT_SERIAL_IIR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_IIR * 4, u8)
+#define GDBPORT_SERIAL_FCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_FCR * 4, u8)
+#define GDBPORT_SERIAL_LCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_LCR * 4, u8)
+#define GDBPORT_SERIAL_MCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MCR * 4, u8)
+#define GDBPORT_SERIAL_LSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_LSR * 4, u8)
+#define GDBPORT_SERIAL_MSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MSR * 4, u8)
+#define GDBPORT_SERIAL_SCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_SCR * 4, u8)
+#define GDBPORT_SERIAL_IRQ	SERIAL_IRQ
+
+#elif defined(CONFIG_GDBSTUB_ON_TTYS1)
+#define GDBPORT_SERIAL_RX	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_RX  * 4, u8)
+#define GDBPORT_SERIAL_TX	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_TX  * 4, u8)
+#define GDBPORT_SERIAL_DLL	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_DLL * 4, u8)
+#define GDBPORT_SERIAL_DLM	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_DLM * 4, u8)
+#define GDBPORT_SERIAL_IER	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_IER * 4, u8)
+#define GDBPORT_SERIAL_IIR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_IIR * 4, u8)
+#define GDBPORT_SERIAL_FCR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_FCR * 4, u8)
+#define GDBPORT_SERIAL_LCR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_LCR * 4, u8)
+#define GDBPORT_SERIAL_MCR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_MCR * 4, u8)
+#define GDBPORT_SERIAL_LSR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_LSR * 4, u8)
+#define GDBPORT_SERIAL_MSR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_MSR * 4, u8)
+#define GDBPORT_SERIAL_SCR	__SYSREG(SERIAL_PORT1_BASE_ADDRESS + UART_SCR * 4, u8)
+#define GDBPORT_SERIAL_IRQ	SERIAL_IRQ
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define LSR_WAIT_FOR(STATE)					\
+do {								\
+	while (!(GDBPORT_SERIAL_LSR & UART_LSR_##STATE)) {}	\
+} while (0)
+#define FLOWCTL_WAIT_FOR(LINE)					\
+do {								\
+	while (!(GDBPORT_SERIAL_MSR & UART_MSR_##LINE)) {}	\
+} while (0)
+#define FLOWCTL_CLEAR(LINE)			\
+do {						\
+	GDBPORT_SERIAL_MCR &= ~UART_MCR_##LINE;	\
+} while (0)
+#define FLOWCTL_SET(LINE)			\
+do {						\
+	GDBPORT_SERIAL_MCR |= UART_MCR_##LINE;	\
+} while (0)
+#define FLOWCTL_QUERY(LINE)	({ GDBPORT_SERIAL_MSR & UART_MSR_##LINE; })
+
+static inline void __debug_to_serial(const char *p, int n)
+{
+	char ch;
+
+	FLOWCTL_SET(DTR);
+
+	for (; n > 0; n--) {
+		LSR_WAIT_FOR(THRE);
+		FLOWCTL_WAIT_FOR(CTS);
+
+		ch = *p++;
+		if (ch == 0x0a) {
+			GDBPORT_SERIAL_TX = 0x0d;
+			LSR_WAIT_FOR(THRE);
+			FLOWCTL_WAIT_FOR(CTS);
+		}
+		GDBPORT_SERIAL_TX = ch;
+	}
+
+	FLOWCTL_CLEAR(DTR);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_GDBSTUB_ON_TTYSx */
+
+#endif /* _ASM_UNIT_SERIAL_H */
diff --git a/include/asm-mn10300/unit-asb2303/smc91111.h b/include/asm-mn10300/unit-asb2303/smc91111.h
new file mode 100644
index 000000000000..dd456e9c513f
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2303/smc91111.h
@@ -0,0 +1,50 @@
+/* Support for the SMC91C111 NIC on an ASB2303
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNIT_SMC91111_H
+#define _ASM_UNIT_SMC91111_H
+
+#include <asm/intctl-regs.h>
+
+#define SMC91111_BASE		0xAA000300UL
+#define SMC91111_BASE_END	0xAA000400UL
+#define SMC91111_IRQ		XIRQ3
+
+#define SMC_CAN_USE_8BIT	0
+#define SMC_CAN_USE_16BIT	1
+#define SMC_CAN_USE_32BIT	0
+#define SMC_NOWAIT		1
+#define SMC_IRQ_FLAGS		(0)
+
+#if SMC_CAN_USE_8BIT
+#define SMC_inb(a, r)		inb((unsigned long) ((a) + (r)))
+#define SMC_outb(v, a, r)	outb(v, (unsigned long) ((a) + (r)))
+#endif
+
+#if SMC_CAN_USE_16BIT
+#define SMC_inw(a, r)		inw((unsigned long) ((a) + (r)))
+#define SMC_outw(v, a, r)	outw(v, (unsigned long) ((a) + (r)))
+#define SMC_insw(a, r, p, l)	insw((unsigned long) ((a) + (r)), (p), (l))
+#define SMC_outsw(a, r, p, l)	outsw((unsigned long) ((a) + (r)), (p), (l))
+#endif
+
+#if SMC_CAN_USE_32BIT
+#define SMC_inl(a, r)		inl((unsigned long) ((a) + (r)))
+#define SMC_outl(v, a, r)	outl(v, (unsigned long) ((a) + (r)))
+#define SMC_insl(a, r, p, l)	insl((unsigned long) ((a) + (r)), (p), (l))
+#define SMC_outsl(a, r, p, l)	outsl((unsigned long) ((a) + (r)), (p), (l))
+#endif
+
+#define RPC_LSA_DEFAULT		RPC_LED_100_10
+#define RPC_LSB_DEFAULT		RPC_LED_TX_RX
+
+#define set_irq_type(irq, type)
+
+#endif /*  _ASM_UNIT_SMC91111_H */
diff --git a/include/asm-mn10300/unit-asb2303/timex.h b/include/asm-mn10300/unit-asb2303/timex.h
new file mode 100644
index 000000000000..7e54b0cfdd03
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2303/timex.h
@@ -0,0 +1,135 @@
+/* ASB2303-specific timer specifcations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNIT_TIMEX_H
+#define _ASM_UNIT_TIMEX_H
+
+#ifndef __ASSEMBLY__
+#include <linux/irq.h>
+#endif /* __ASSEMBLY__ */
+
+#include <asm/timer-regs.h>
+#include <asm/unit/clock.h>
+
+/*
+ * jiffies counter specifications
+ */
+
+#define	TMJCBR_MAX		0xffff
+#define	TMJCBC			TM01BC
+
+#define	TMJCMD			TM01MD
+#define	TMJCBR			TM01BR
+#define	TMJCIRQ			TM1IRQ
+#define	TMJCICR			TM1ICR
+#define	TMJCICR_LEVEL		GxICR_LEVEL_5
+
+#ifndef __ASSEMBLY__
+
+static inline void startup_jiffies_counter(void)
+{
+	unsigned rate;
+	u16 md, t16;
+
+	/* use as little prescaling as possible to avoid losing accuracy */
+	md = TM0MD_SRC_IOCLK;
+	rate = MN10300_JCCLK / HZ;
+
+	if (rate > TMJCBR_MAX) {
+		md = TM0MD_SRC_IOCLK_8;
+		rate = MN10300_JCCLK / 8 / HZ;
+
+		if (rate > TMJCBR_MAX) {
+			md = TM0MD_SRC_IOCLK_32;
+			rate = MN10300_JCCLK / 32 / HZ;
+
+			if (rate > TMJCBR_MAX)
+				BUG();
+		}
+	}
+
+	TMJCBR = rate - 1;
+	t16 = TMJCBR;
+
+	TMJCMD =
+		md |
+		TM1MD_SRC_TM0CASCADE << 8 |
+		TM0MD_INIT_COUNTER |
+		TM1MD_INIT_COUNTER << 8;
+
+	TMJCMD =
+		md |
+		TM1MD_SRC_TM0CASCADE << 8 |
+		TM0MD_COUNT_ENABLE |
+		TM1MD_COUNT_ENABLE << 8;
+
+	t16 = TMJCMD;
+
+	TMJCICR |= GxICR_ENABLE | GxICR_DETECT | GxICR_REQUEST;
+	t16 = TMJCICR;
+}
+
+static inline void shutdown_jiffies_counter(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+
+/*
+ * timestamp counter specifications
+ */
+
+#define	TMTSCBR_MAX		0xffffffff
+#define	TMTSCBC			TM45BC
+
+#ifndef __ASSEMBLY__
+
+static inline void startup_timestamp_counter(void)
+{
+	/* set up timer 4 & 5 cascaded as a 32-bit counter to count real time
+	 * - count down from 4Gig-1 to 0 and wrap at IOCLK rate
+	 */
+	TM45BR = TMTSCBR_MAX;
+
+	TM4MD = TM4MD_SRC_IOCLK;
+	TM4MD |= TM4MD_INIT_COUNTER;
+	TM4MD &= ~TM4MD_INIT_COUNTER;
+	TM4ICR = 0;
+
+	TM5MD = TM5MD_SRC_TM4CASCADE;
+	TM5MD |= TM5MD_INIT_COUNTER;
+	TM5MD &= ~TM5MD_INIT_COUNTER;
+	TM5ICR = 0;
+
+	TM5MD |= TM5MD_COUNT_ENABLE;
+	TM4MD |= TM4MD_COUNT_ENABLE;
+}
+
+static inline void shutdown_timestamp_counter(void)
+{
+	TM4MD = 0;
+	TM5MD = 0;
+}
+
+/*
+ * we use a cascaded pair of 16-bit down-counting timers to count I/O
+ * clock cycles for the purposes of time keeping
+ */
+typedef unsigned long cycles_t;
+
+static inline cycles_t read_timestamp_counter(void)
+{
+	return (cycles_t)TMTSCBC;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_TIMEX_H */
diff --git a/include/asm-mn10300/unit-asb2305/clock.h b/include/asm-mn10300/unit-asb2305/clock.h
new file mode 100644
index 000000000000..7d514841ffda
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2305/clock.h
@@ -0,0 +1,45 @@
+/* ASB2305-specific clocks
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_UNIT_CLOCK_H
+#define _ASM_UNIT_CLOCK_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_MN10300_RTC
+
+extern unsigned long mn10300_ioclk;	/* IOCLK (crystal speed) in HZ */
+extern unsigned long mn10300_iobclk;
+extern unsigned long mn10300_tsc_per_HZ;
+
+#define MN10300_IOCLK		((unsigned long)mn10300_ioclk)
+/* If this processors has a another clock, uncomment the below. */
+/* #define MN10300_IOBCLK	((unsigned long)mn10300_iobclk) */
+
+#else /* !CONFIG_MN10300_RTC */
+
+#define MN10300_IOCLK		33333333UL
+/* #define MN10300_IOBCLK	66666666UL */
+
+#endif /* !CONFIG_MN10300_RTC */
+
+#define MN10300_JCCLK		MN10300_IOCLK
+#define MN10300_TSCCLK		MN10300_IOCLK
+
+#ifdef CONFIG_MN10300_RTC
+#define MN10300_TSC_PER_HZ	((unsigned long)mn10300_tsc_per_HZ)
+#else /* !CONFIG_MN10300_RTC */
+#define MN10300_TSC_PER_HZ	(MN10300_TSCCLK/HZ)
+#endif /* !CONFIG_MN10300_RTC */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_CLOCK_H */
diff --git a/include/asm-mn10300/unit-asb2305/leds.h b/include/asm-mn10300/unit-asb2305/leds.h
new file mode 100644
index 000000000000..bc471f617fd1
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2305/leds.h
@@ -0,0 +1,51 @@
+/* ASB2305-specific LEDs
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_UNIT_LEDS_H
+#define _ASM_UNIT_LEDS_H
+
+#include <asm/pio-regs.h>
+#include <asm/cpu-regs.h>
+#include <asm/exceptions.h>
+
+#define ASB2305_7SEGLEDS	__SYSREG(0xA6F90000, u32)
+
+/* perform a hard reset by driving PIO06 low */
+#define mn10300_unit_hard_reset()		\
+do {						\
+	P0OUT &= 0xbf;				\
+	P0MD = (P0MD & P0MD_6) | P0MD_6_OUT;	\
+} while (0)
+
+/*
+ * use the 7-segment LEDs to indicate states
+ */
+/* indicate double-fault by displaying "db-f" on the LEDs */
+#define mn10300_set_dbfleds			\
+	mov	0x43077f1d,d0		;	\
+	mov	d0,(ASB2305_7SEGLEDS)
+
+/* flip the 7-segment LEDs between "Gdb-" and "----" */
+#define mn10300_set_gdbleds(ONOFF)				\
+do {								\
+	ASB2305_7SEGLEDS = (ONOFF) ? 0x8543077f : 0x7f7f7f7f;	\
+} while (0)
+
+#ifndef __ASSEMBLY__
+extern void peripheral_leds_display_exception(enum exception_code);
+extern void peripheral_leds_led_chase(void);
+extern void peripheral_leds7x4_display_dec(unsigned int, unsigned int);
+extern void peripheral_leds7x4_display_hex(unsigned int, unsigned int);
+extern void peripheral_leds7x4_display_minssecs(unsigned int, unsigned int);
+extern void peripheral_leds7x4_display_rtc(void);
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_LEDS_H */
diff --git a/include/asm-mn10300/unit-asb2305/serial.h b/include/asm-mn10300/unit-asb2305/serial.h
new file mode 100644
index 000000000000..73d31d67bb71
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2305/serial.h
@@ -0,0 +1,120 @@
+/* ASB2305-specific 8250 serial ports
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNIT_SERIAL_H
+#define _ASM_UNIT_SERIAL_H
+
+#include <asm/cpu/cpu-regs.h>
+#include <asm/proc/irq.h>
+#include <linux/serial_reg.h>
+
+#define SERIAL_PORT0_BASE_ADDRESS	0xA6FB0000
+#define ASB2305_DEBUG_MCR	__SYSREG(0xA6FB0000 + UART_MCR * 2, u8)
+
+#define SERIAL_IRQ	XIRQ0	/* Dual serial (PC16552)	(Hi) */
+
+/*
+ * dispose of the /dev/ttyS0 serial port
+ */
+#ifndef CONFIG_GDBSTUB_ON_TTYSx
+
+#define SERIAL_PORT_DFNS						\
+	{								\
+	.baud_base		= BASE_BAUD,				\
+	.irq			= SERIAL_IRQ,				\
+	.flags			= STD_COM_FLAGS,			\
+	.iomem_base		= (u8 *) SERIAL_PORT0_BASE_ADDRESS,	\
+	.iomem_reg_shift	= 2,					\
+	.io_type		= SERIAL_IO_MEM,			\
+	},
+
+#ifndef __ASSEMBLY__
+
+static inline void __debug_to_serial(const char *p, int n)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#else /* CONFIG_GDBSTUB_ON_TTYSx */
+
+#define SERIAL_PORT_DFNS /* stolen by gdb-stub */
+
+#if defined(CONFIG_GDBSTUB_ON_TTYS0)
+#define GDBPORT_SERIAL_RX	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_RX  * 4, u8)
+#define GDBPORT_SERIAL_TX	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_TX  * 4, u8)
+#define GDBPORT_SERIAL_DLL	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_DLL * 4, u8)
+#define GDBPORT_SERIAL_DLM	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_DLM * 4, u8)
+#define GDBPORT_SERIAL_IER	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_IER * 4, u8)
+#define GDBPORT_SERIAL_IIR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_IIR * 4, u8)
+#define GDBPORT_SERIAL_FCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_FCR * 4, u8)
+#define GDBPORT_SERIAL_LCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_LCR * 4, u8)
+#define GDBPORT_SERIAL_MCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MCR * 4, u8)
+#define GDBPORT_SERIAL_LSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_LSR * 4, u8)
+#define GDBPORT_SERIAL_MSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MSR * 4, u8)
+#define GDBPORT_SERIAL_SCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_SCR * 4, u8)
+#define GDBPORT_SERIAL_IRQ	SERIAL_IRQ
+
+#elif defined(CONFIG_GDBSTUB_ON_TTYS1)
+#error The ASB2305 doesnt have a /dev/ttyS1
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define TTYS0_TX	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_TX  * 4, u8)
+#define TTYS0_MCR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MCR * 4, u8)
+#define TTYS0_LSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_LSR * 4, u8)
+#define TTYS0_MSR	__SYSREG(SERIAL_PORT0_BASE_ADDRESS + UART_MSR * 4, u8)
+
+#define LSR_WAIT_FOR(STATE)				\
+do {							\
+	while (!(TTYS0_LSR & UART_LSR_##STATE)) {}	\
+} while (0)
+#define FLOWCTL_WAIT_FOR(LINE)				\
+do {							\
+	while (!(TTYS0_MSR & UART_MSR_##LINE)) {}	\
+} while (0)
+#define FLOWCTL_CLEAR(LINE)			\
+do {						\
+	TTYS0_MCR &= ~UART_MCR_##LINE;		\
+} while (0)
+#define FLOWCTL_SET(LINE)			\
+do {						\
+	TTYS0_MCR |= UART_MCR_##LINE;		\
+} while (0)
+#define FLOWCTL_QUERY(LINE)	({ TTYS0_MSR & UART_MSR_##LINE; })
+
+static inline void __debug_to_serial(const char *p, int n)
+{
+	char ch;
+
+	FLOWCTL_SET(DTR);
+
+	for (; n > 0; n--) {
+		LSR_WAIT_FOR(THRE);
+		FLOWCTL_WAIT_FOR(CTS);
+
+		ch = *p++;
+		if (ch == 0x0a) {
+			TTYS0_TX = 0x0d;
+			LSR_WAIT_FOR(THRE);
+			FLOWCTL_WAIT_FOR(CTS);
+		}
+		TTYS0_TX = ch;
+	}
+
+	FLOWCTL_CLEAR(DTR);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_GDBSTUB_ON_TTYSx */
+
+#endif /* _ASM_UNIT_SERIAL_H */
diff --git a/include/asm-mn10300/unit-asb2305/timex.h b/include/asm-mn10300/unit-asb2305/timex.h
new file mode 100644
index 000000000000..10e1bfe34463
--- /dev/null
+++ b/include/asm-mn10300/unit-asb2305/timex.h
@@ -0,0 +1,135 @@
+/* ASB2305 timer specifcations
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_UNIT_TIMEX_H
+#define _ASM_UNIT_TIMEX_H
+
+#ifndef __ASSEMBLY__
+#include <linux/irq.h>
+#endif /* __ASSEMBLY__ */
+
+#include <asm/cpu/timer-regs.h>
+#include <asm/unit/clock.h>
+
+/*
+ * jiffies counter specifications
+ */
+
+#define	TMJCBR_MAX		0xffff
+#define	TMJCBC			TM01BC
+
+#define	TMJCMD			TM01MD
+#define	TMJCBR			TM01BR
+#define	TMJCIRQ			TM1IRQ
+#define	TMJCICR			TM1ICR
+#define	TMJCICR_LEVEL		GxICR_LEVEL_5
+
+#ifndef __ASSEMBLY__
+
+static inline void startup_jiffies_counter(void)
+{
+	unsigned rate;
+	u16 md, t16;
+
+	/* use as little prescaling as possible to avoid losing accuracy */
+	md = TM0MD_SRC_IOCLK;
+	rate = MN10300_JCCLK / HZ;
+
+	if (rate > TMJCBR_MAX) {
+		md = TM0MD_SRC_IOCLK_8;
+		rate = MN10300_JCCLK / 8 / HZ;
+
+		if (rate > TMJCBR_MAX) {
+			md = TM0MD_SRC_IOCLK_32;
+			rate = MN10300_JCCLK / 32 / HZ;
+
+			if (rate > TMJCBR_MAX)
+				BUG();
+		}
+	}
+
+	TMJCBR = rate - 1;
+	t16 = TMJCBR;
+
+	TMJCMD =
+		md |
+		TM1MD_SRC_TM0CASCADE << 8 |
+		TM0MD_INIT_COUNTER |
+		TM1MD_INIT_COUNTER << 8;
+
+	TMJCMD =
+		md |
+		TM1MD_SRC_TM0CASCADE << 8 |
+		TM0MD_COUNT_ENABLE |
+		TM1MD_COUNT_ENABLE << 8;
+
+	t16 = TMJCMD;
+
+	TMJCICR |= GxICR_ENABLE | GxICR_DETECT | GxICR_REQUEST;
+	t16 = TMJCICR;
+}
+
+static inline void shutdown_jiffies_counter(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+
+/*
+ * timestamp counter specifications
+ */
+
+#define	TMTSCBR_MAX		0xffffffff
+#define	TMTSCBC			TM45BC
+
+#ifndef __ASSEMBLY__
+
+static inline void startup_timestamp_counter(void)
+{
+	/* set up timer 4 & 5 cascaded as a 32-bit counter to count real time
+	 * - count down from 4Gig-1 to 0 and wrap at IOCLK rate
+	 */
+	TM45BR = TMTSCBR_MAX;
+
+	TM4MD = TM4MD_SRC_IOCLK;
+	TM4MD |= TM4MD_INIT_COUNTER;
+	TM4MD &= ~TM4MD_INIT_COUNTER;
+	TM4ICR = 0;
+
+	TM5MD = TM5MD_SRC_TM4CASCADE;
+	TM5MD |= TM5MD_INIT_COUNTER;
+	TM5MD &= ~TM5MD_INIT_COUNTER;
+	TM5ICR = 0;
+
+	TM5MD |= TM5MD_COUNT_ENABLE;
+	TM4MD |= TM4MD_COUNT_ENABLE;
+}
+
+static inline void shutdown_timestamp_counter(void)
+{
+	TM4MD = 0;
+	TM5MD = 0;
+}
+
+/*
+ * we use a cascaded pair of 16-bit down-counting timers to count I/O
+ * clock cycles for the purposes of time keeping
+ */
+typedef unsigned long cycles_t;
+
+static inline cycles_t read_timestamp_counter(void)
+{
+	return (cycles_t) TMTSCBC;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_UNIT_TIMEX_H */
diff --git a/include/asm-mn10300/user.h b/include/asm-mn10300/user.h
new file mode 100644
index 000000000000..e1193908b78c
--- /dev/null
+++ b/include/asm-mn10300/user.h
@@ -0,0 +1,53 @@
+/* MN10300 User process data
+ *
+ * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef _ASM_USER_H
+#define _ASM_USER_H
+
+#include <asm/page.h>
+#include <linux/ptrace.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * When the kernel dumps core, it starts by dumping the user struct - this will
+ * be used by gdb to figure out where the data and stack segments are within
+ * the file, and what virtual addresses to use.
+ */
+struct user {
+	/* We start with the registers, to mimic the way that "memory" is
+	 * returned from the ptrace(3,...) function.
+	 */
+	struct pt_regs regs;		/* Where the registers are actually stored */
+
+	/* The rest of this junk is to help gdb figure out what goes where */
+	unsigned long int u_tsize;	/* Text segment size (pages). */
+	unsigned long int u_dsize;	/* Data segment size (pages). */
+	unsigned long int u_ssize;	/* Stack segment size (pages). */
+	unsigned long start_code;	/* Starting virtual address of text. */
+	unsigned long start_stack;	/* Starting virtual address of stack area.
+					   This is actually the bottom of the stack,
+					   the top of the stack is always found in the
+					   esp register.  */
+	long int signal;		/* Signal that caused the core dump. */
+	int reserved;			/* No longer used */
+	struct user_pt_regs *u_ar0;	/* Used by gdb to help find the values for */
+
+	/* the registers */
+	unsigned long magic;		/* To uniquely identify a core file */
+	char u_comm[32];		/* User command that was responsible */
+};
+#endif
+
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR	+(u.start_code)
+#define HOST_STACK_END_ADDR	+(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _ASM_USER_H */
diff --git a/include/asm-mn10300/vga.h b/include/asm-mn10300/vga.h
new file mode 100644
index 000000000000..0163e50a3459
--- /dev/null
+++ b/include/asm-mn10300/vga.h
@@ -0,0 +1,17 @@
+/* MN10300 VGA register definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_VGA_H
+#define _ASM_VGA_H
+
+
+
+#endif /* _ASM_VGA_H */
diff --git a/include/asm-mn10300/xor.h b/include/asm-mn10300/xor.h
new file mode 100644
index 000000000000..c82eb12a5b18
--- /dev/null
+++ b/include/asm-mn10300/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 5834e843a946..18bea78fe47b 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
 #define EM_V850		87	/* NEC v850 */
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
+#define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
@@ -47,6 +48,8 @@
 #define EM_CYGNUS_M32R	0x9041
 /* This is the old interim value for S/390 architecture */
 #define EM_S390_OLD	0xA390
+/* Also Panasonic/MEI MN10300, AM33 */
+#define EM_CYGNUS_MN10300 0xbeef
 
 
 #endif /* _LINUX_ELF_EM_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 568042290c0b..3344185dd3b2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -222,6 +222,7 @@ extern int panic_on_unrecovered_nmi;
 extern int tainted;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
+extern int root_mountflags;
 
 /* Values used for system_state */
 extern enum system_states {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4f4008fc73e4..ce0bb2600c25 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -404,7 +404,8 @@ config DEBUG_HIGHMEM
 config DEBUG_BUGVERBOSE
 	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
 	depends on BUG
-	depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || FRV || SUPERH || GENERIC_BUG || BLACKFIN
+	depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \
+		   FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300
 	default !EMBEDDED
 	help
 	  Say Y here to make BUG() panics output the file name and line number
@@ -454,7 +455,9 @@ config DEBUG_SG
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32 || SUPERH || BLACKFIN)
+	depends on DEBUG_KERNEL && \
+		(X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || \
+		 AVR32 || SUPERH || BLACKFIN || MN10300)
 	default y if DEBUG_INFO && UML
 	help
 	  If you say Y here the resulting kernel image will be slightly larger
-- 
cgit 


From ec7015840ad7a8cdc87f52367ffe9c0b0401d919 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 8 Feb 2008 04:19:55 -0800
Subject: Remove fastcall from linux/include

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irq.h      | 16 ++++++++--------
 include/linux/kernel.h   |  2 +-
 include/linux/mutex.h    | 12 ++++++------
 include/linux/preempt.h  |  4 ++--
 include/linux/spinlock.h |  2 +-
 include/linux/timer.h    |  4 ++--
 6 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4669be080617..a19b381d4112 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -25,7 +25,7 @@
 #include <asm/irq_regs.h>
 
 struct irq_desc;
-typedef	void fastcall (*irq_flow_handler_t)(unsigned int irq,
+typedef	void (*irq_flow_handler_t)(unsigned int irq,
 					    struct irq_desc *desc);
 
 
@@ -276,19 +276,19 @@ extern int handle_IRQ_event(unsigned int irq, struct irqaction *action);
  * Built-in IRQ handlers for various IRQ types,
  * callable via desc->chip->handle_irq()
  */
-extern void fastcall handle_level_irq(unsigned int irq, struct irq_desc *desc);
-extern void fastcall handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
-extern void fastcall handle_edge_irq(unsigned int irq, struct irq_desc *desc);
-extern void fastcall handle_simple_irq(unsigned int irq, struct irq_desc *desc);
-extern void fastcall handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
-extern void fastcall handle_bad_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_level_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 
 /*
  * Monolithic do_IRQ implementation.
  * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
 #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-extern fastcall unsigned int __do_IRQ(unsigned int irq);
+extern unsigned int __do_IRQ(unsigned int irq);
 #endif
 
 /*
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3344185dd3b2..7d7519628dfa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -133,7 +133,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 extern void oops_enter(void);
 extern void oops_exit(void);
 extern int oops_may_print(void);
-fastcall NORET_TYPE void do_exit(long error_code)
+NORET_TYPE void do_exit(long error_code)
 	ATTRIB_NORET;
 NORET_TYPE void complete_and_exit(struct completion *, long)
 	ATTRIB_NORET;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 05c590352dd7..bc6da10ceee0 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -112,7 +112,7 @@ extern void __mutex_init(struct mutex *lock, const char *name,
  *
  * Returns 1 if the mutex is locked, 0 if unlocked.
  */
-static inline int fastcall mutex_is_locked(struct mutex *lock)
+static inline int mutex_is_locked(struct mutex *lock)
 {
 	return atomic_read(&lock->count) != 1;
 }
@@ -132,9 +132,9 @@ extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
 #else
-extern void fastcall mutex_lock(struct mutex *lock);
-extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
-extern int __must_check fastcall mutex_lock_killable(struct mutex *lock);
+extern void mutex_lock(struct mutex *lock);
+extern int __must_check mutex_lock_interruptible(struct mutex *lock);
+extern int __must_check mutex_lock_killable(struct mutex *lock);
 
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
@@ -145,7 +145,7 @@ extern int __must_check fastcall mutex_lock_killable(struct mutex *lock);
  * NOTE: mutex_trylock() follows the spin_trylock() convention,
  *       not the down_trylock() convention!
  */
-extern int fastcall mutex_trylock(struct mutex *lock);
-extern void fastcall mutex_unlock(struct mutex *lock);
+extern int mutex_trylock(struct mutex *lock);
+extern void mutex_unlock(struct mutex *lock);
 
 #endif
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 484988ed301e..23f0c54175cd 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -11,8 +11,8 @@
 #include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
-  extern void fastcall add_preempt_count(int val);
-  extern void fastcall sub_preempt_count(int val);
+  extern void add_preempt_count(int val);
+  extern void sub_preempt_count(int val);
 #else
 # define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
 # define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 124449733c55..576a5f77d3bd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -71,7 +71,7 @@
 #define LOCK_SECTION_END                        \
         ".previous\n\t"
 
-#define __lockfunc fastcall __attribute__((section(".spinlock.text")))
+#define __lockfunc __attribute__((section(".spinlock.text")))
 
 /*
  * Pull the raw_spinlock_t and raw_rwlock_t definitions:
diff --git a/include/linux/timer.h b/include/linux/timer.h
index de0e71359ede..ffe438a901b5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -35,8 +35,8 @@ extern struct tvec_base boot_tvec_bases;
 	struct timer_list _name =				\
 		TIMER_INITIALIZER(_function, _expires, _data)
 
-void fastcall init_timer(struct timer_list * timer);
-void fastcall init_timer_deferrable(struct timer_list *timer);
+void init_timer(struct timer_list *timer);
+void init_timer_deferrable(struct timer_list *timer);
 
 static inline void setup_timer(struct timer_list * timer,
 				void (*function)(unsigned long),
-- 
cgit 


From 8b5f6883683c91ad7e1af32b7ceeb604d68e2865 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Fri, 8 Feb 2008 04:20:12 -0800
Subject: byteorder: move le32_add_cpu & friends from OCFS2 to core

This patchset moves le*_add_cpu and be*_add_cpu functions from OCFS2 to core
header (1st), converts ext3 filesystem to this API (2nd) and replaces XFS
different named functions with new ones (3rd).

There are many places where these functions will be useful.  Just look at:
grep -r 'cpu_to_[ble12346]*([ble12346]*_to_cpu.*[-+]' linux-src/ Patch for
ext3 is an example how conversions will probably look like.

This patch:

- move inline functions which add native byte order variable to
  little/big endian variable to core header
  * le16_add_cpu(__le16 *var, u16 val)
  * le32_add_cpu(__le32 *var, u32 val)
  * le64_add_cpu(__le64 *var, u64 val)
  * be32_add_cpu(__be32 *var, u32 val)
- add for completeness:
  * be16_add_cpu(__be16 *var, u16 val)
  * be64_add_cpu(__be64 *var, u64 val)

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Acked-by: Mark Fasheh <mark.fasheh@oracle.com>
Cc: David Chinner <dgc@sgi.com>
Cc: Timothy Shimmin <tes@sgi.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/endian.h         | 30 --------------------------
 fs/ocfs2/cluster/nodemanager.c    |  1 -
 fs/ocfs2/dlm/dlmast.c             |  1 -
 fs/ocfs2/endian.h                 | 45 ---------------------------------------
 fs/ocfs2/ocfs2.h                  |  1 -
 include/linux/byteorder/generic.h | 30 ++++++++++++++++++++++++++
 6 files changed, 30 insertions(+), 78 deletions(-)
 delete mode 100644 fs/ocfs2/cluster/endian.h
 delete mode 100644 fs/ocfs2/endian.h

(limited to 'include/linux')

diff --git a/fs/ocfs2/cluster/endian.h b/fs/ocfs2/cluster/endian.h
deleted file mode 100644
index 2df9082f4e35..000000000000
--- a/fs/ocfs2/cluster/endian.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_CLUSTER_ENDIAN_H
-#define OCFS2_CLUSTER_ENDIAN_H
-
-static inline void be32_add_cpu(__be32 *var, u32 val)
-{
-	*var = cpu_to_be32(be32_to_cpu(*var) + val);
-}
-
-#endif /* OCFS2_CLUSTER_ENDIAN_H */
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index af2070da308b..709fba25bf7e 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -24,7 +24,6 @@
 #include <linux/sysctl.h>
 #include <linux/configfs.h>
 
-#include "endian.h"
 #include "tcp.h"
 #include "nodemanager.h"
 #include "heartbeat.h"
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 2fd8bded38f3..644bee55d8ba 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -43,7 +43,6 @@
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
 #include "cluster/tcp.h"
-#include "cluster/endian.h"
 
 #include "dlmapi.h"
 #include "dlmcommon.h"
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h
deleted file mode 100644
index 1942e09f6ee5..000000000000
--- a/fs/ocfs2/endian.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * Copyright (C) 2005 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_ENDIAN_H
-#define OCFS2_ENDIAN_H
-
-static inline void le16_add_cpu(__le16 *var, u16 val)
-{
-	*var = cpu_to_le16(le16_to_cpu(*var) + val);
-}
-
-static inline void le32_add_cpu(__le32 *var, u32 val)
-{
-	*var = cpu_to_le32(le32_to_cpu(*var) + val);
-}
-
-static inline void le64_add_cpu(__le64 *var, u64 val)
-{
-	*var = cpu_to_le64(le64_to_cpu(*var) + val);
-}
-
-static inline void be32_add_cpu(__be32 *var, u32 val)
-{
-	*var = cpu_to_be32(be32_to_cpu(*var) + val);
-}
-
-#endif /* OCFS2_ENDIAN_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e8b7292e0152..6546cef212e3 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -43,7 +43,6 @@
 #include "dlm/dlmapi.h"
 
 #include "ocfs2_fs.h"
-#include "endian.h"
 #include "ocfs2_lockid.h"
 
 /* Most user visible OCFS2 inodes will have very few pieces of
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 3dc715b02500..d3771551fdd9 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -146,6 +146,36 @@
 #define htons(x) ___htons(x)
 #define ntohs(x) ___ntohs(x)
 
+static inline void le16_add_cpu(__le16 *var, u16 val)
+{
+	*var = cpu_to_le16(le16_to_cpu(*var) + val);
+}
+
+static inline void le32_add_cpu(__le32 *var, u32 val)
+{
+	*var = cpu_to_le32(le32_to_cpu(*var) + val);
+}
+
+static inline void le64_add_cpu(__le64 *var, u64 val)
+{
+	*var = cpu_to_le64(le64_to_cpu(*var) + val);
+}
+
+static inline void be16_add_cpu(__be16 *var, u16 val)
+{
+	*var = cpu_to_be16(be16_to_cpu(*var) + val);
+}
+
+static inline void be32_add_cpu(__be32 *var, u32 val)
+{
+	*var = cpu_to_be32(be32_to_cpu(*var) + val);
+}
+
+static inline void be64_add_cpu(__be64 *var, u64 val)
+{
+	*var = cpu_to_be64(be64_to_cpu(*var) + val);
+}
+
 #endif /* KERNEL */
 
 #endif /* _LINUX_BYTEORDER_GENERIC_H */
-- 
cgit 


From 8b88b0998e35d239e74446cc30f354bdab86df89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 8 Feb 2008 04:20:26 -0800
Subject: libfs: allow error return from simple attributes

Sometimes simple attributes might need to return an error, e.g. for
acquiring a mutex interruptibly.  In fact we have that situation in
spufs already which is the original user of the simple attributes.  This
patch merged the temporarily forked attributes in spufs back into the
main ones and allows to return errors.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: <stefano.brivio@polimi.it>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg KH <greg@kroah.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/kernel/ocd.c                  | 18 +++++++++++-------
 arch/powerpc/platforms/cell/spufs/file.c |  8 ++++----
 fs/debugfs/file.c                        | 32 ++++++++++++++++++++------------
 fs/libfs.c                               | 21 +++++++++++++--------
 include/linux/fs.h                       |  2 +-
 lib/fault-inject.c                       | 19 ++++++++++++-------
 virt/kvm/kvm_main.c                      | 16 ++++++++--------
 7 files changed, 69 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/kernel/ocd.c b/arch/avr32/kernel/ocd.c
index c4f023294d75..1b0245d4e0ca 100644
--- a/arch/avr32/kernel/ocd.c
+++ b/arch/avr32/kernel/ocd.c
@@ -90,25 +90,29 @@ static struct dentry *ocd_debugfs_DC;
 static struct dentry *ocd_debugfs_DS;
 static struct dentry *ocd_debugfs_count;
 
-static u64 ocd_DC_get(void *data)
+static int ocd_DC_get(void *data, u64 *val)
 {
-	return ocd_read(DC);
+	*val = ocd_read(DC);
+	return 0;
 }
-static void ocd_DC_set(void *data, u64 val)
+static int ocd_DC_set(void *data, u64 val)
 {
 	ocd_write(DC, val);
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_DC, ocd_DC_get, ocd_DC_set, "0x%08llx\n");
 
-static u64 ocd_DS_get(void *data)
+static int ocd_DS_get(void *data, u64 *val)
 {
-	return ocd_read(DS);
+	*val = ocd_read(DS);
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_DS, ocd_DS_get, NULL, "0x%08llx\n");
 
-static u64 ocd_count_get(void *data)
+static int ocd_count_get(void *data, u64 *val)
 {
-	return ocd_count;
+	*val = ocd_count;
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_count, ocd_count_get, NULL, "%lld\n");
 
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 1018acd1746b..9326714717ca 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -460,7 +460,7 @@ static int spufs_cntl_open(struct inode *inode, struct file *file)
 	if (!i->i_openers++)
 		ctx->cntl = inode->i_mapping;
 	mutex_unlock(&ctx->mapping_lock);
-	return spufs_attr_open(inode, file, spufs_cntl_get,
+	return simple_attr_open(inode, file, spufs_cntl_get,
 					spufs_cntl_set, "0x%08lx");
 }
 
@@ -470,7 +470,7 @@ spufs_cntl_release(struct inode *inode, struct file *file)
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
-	spufs_attr_release(inode, file);
+	simple_attr_close(inode, file);
 
 	mutex_lock(&ctx->mapping_lock);
 	if (!--i->i_openers)
@@ -482,8 +482,8 @@ spufs_cntl_release(struct inode *inode, struct file *file)
 static const struct file_operations spufs_cntl_fops = {
 	.open = spufs_cntl_open,
 	.release = spufs_cntl_release,
-	.read = spufs_attr_read,
-	.write = spufs_attr_write,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
 	.mmap = spufs_cntl_mmap,
 };
 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fa6b7f7ff914..fddffe4851f5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -56,13 +56,15 @@ const struct inode_operations debugfs_link_operations = {
 	.follow_link    = debugfs_follow_link,
 };
 
-static void debugfs_u8_set(void *data, u64 val)
+static int debugfs_u8_set(void *data, u64 val)
 {
 	*(u8 *)data = val;
+	return 0;
 }
-static u64 debugfs_u8_get(void *data)
+static int debugfs_u8_get(void *data, u64 *val)
 {
-	return *(u8 *)data;
+	*val = *(u8 *)data;
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
 
@@ -97,13 +99,15 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u8);
 
-static void debugfs_u16_set(void *data, u64 val)
+static int debugfs_u16_set(void *data, u64 val)
 {
 	*(u16 *)data = val;
+	return 0;
 }
-static u64 debugfs_u16_get(void *data)
+static int debugfs_u16_get(void *data, u64 *val)
 {
-	return *(u16 *)data;
+	*val = *(u16 *)data;
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
 
@@ -138,13 +142,15 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u16);
 
-static void debugfs_u32_set(void *data, u64 val)
+static int debugfs_u32_set(void *data, u64 val)
 {
 	*(u32 *)data = val;
+	return 0;
 }
-static u64 debugfs_u32_get(void *data)
+static int debugfs_u32_get(void *data, u64 *val)
 {
-	return *(u32 *)data;
+	*val = *(u32 *)data;
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
 
@@ -179,14 +185,16 @@ struct dentry *debugfs_create_u32(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u32);
 
-static void debugfs_u64_set(void *data, u64 val)
+static int debugfs_u64_set(void *data, u64 val)
 {
 	*(u64 *)data = val;
+	return 0;
 }
 
-static u64 debugfs_u64_get(void *data)
+static int debugfs_u64_get(void *data, u64 *val)
 {
-	return *(u64 *)data;
+	*val = *(u64 *)data;
+	return 0;
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 5523bde96387..2319415ddb5e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -583,8 +583,8 @@ int simple_transaction_release(struct inode *inode, struct file *file)
 /* Simple attribute files */
 
 struct simple_attr {
-	u64 (*get)(void *);
-	void (*set)(void *, u64);
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
 	char get_buf[24];	/* enough to store a u64 and "\n\0" */
 	char set_buf[24];
 	void *data;
@@ -595,7 +595,7 @@ struct simple_attr {
 /* simple_attr_open is called by an actual attribute open file operation
  * to set the attribute specific access operations. */
 int simple_attr_open(struct inode *inode, struct file *file,
-		     u64 (*get)(void *), void (*set)(void *, u64),
+		     int (*get)(void *, u64 *), int (*set)(void *, u64),
 		     const char *fmt)
 {
 	struct simple_attr *attr;
@@ -635,14 +635,20 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
 		return -EACCES;
 
 	mutex_lock(&attr->mutex);
-	if (*ppos) /* continued read */
+	if (*ppos) {		/* continued read */
 		size = strlen(attr->get_buf);
-	else	  /* first read */
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
 		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
-				 attr->fmt,
-				 (unsigned long long)attr->get(attr->data));
+				 attr->fmt, (unsigned long long)val);
+	}
 
 	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
 	mutex_unlock(&attr->mutex);
 	return ret;
 }
@@ -657,7 +663,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 	ssize_t ret;
 
 	attr = file->private_data;
-
 	if (!attr->set)
 		return -EACCES;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 36b7abefacbe..ebe996ac2589 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2068,7 +2068,7 @@ __simple_attr_check_format(const char *fmt, ...)
 }
 
 int simple_attr_open(struct inode *inode, struct file *file,
-		     u64 (*get)(void *), void (*set)(void *, u64),
+		     int (*get)(void *, u64 *), int (*set)(void *, u64),
 		     const char *fmt);
 int simple_attr_close(struct inode *inode, struct file *file);
 ssize_t simple_attr_read(struct file *file, char __user *buf,
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 23985a278bbb..a50a311554cc 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -134,23 +134,26 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 
-static void debugfs_ul_set(void *data, u64 val)
+static int debugfs_ul_set(void *data, u64 val)
 {
 	*(unsigned long *)data = val;
+	return 0;
 }
 
 #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
-static void debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val)
+static int debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val)
 {
 	*(unsigned long *)data =
 		val < MAX_STACK_TRACE_DEPTH ?
 		val : MAX_STACK_TRACE_DEPTH;
+	return 0;
 }
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
-static u64 debugfs_ul_get(void *data)
+static int debugfs_ul_get(void *data, u64 *val)
 {
-	return *(unsigned long *)data;
+	*val = *(unsigned long *)data;
+	return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");
@@ -174,14 +177,16 @@ static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
 }
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
-static void debugfs_atomic_t_set(void *data, u64 val)
+static int debugfs_atomic_t_set(void *data, u64 val)
 {
 	atomic_set((atomic_t *)data, val);
+	return 0;
 }
 
-static u64 debugfs_atomic_t_get(void *data)
+static int debugfs_atomic_t_get(void *data, u64 *val)
 {
-	return atomic_read((atomic_t *)data);
+	*val = atomic_read((atomic_t *)data);
+	return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3c4fe26096fc..32fbf8006969 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1186,38 +1186,38 @@ static struct notifier_block kvm_cpu_notifier = {
 	.priority = 20, /* must be > scheduler priority */
 };
 
-static u64 vm_stat_get(void *_offset)
+static int vm_stat_get(void *_offset, u64 *val)
 {
 	unsigned offset = (long)_offset;
-	u64 total = 0;
 	struct kvm *kvm;
 
+	*val = 0;
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
-		total += *(u32 *)((void *)kvm + offset);
+		*val += *(u32 *)((void *)kvm + offset);
 	spin_unlock(&kvm_lock);
-	return total;
+	return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
 
-static u64 vcpu_stat_get(void *_offset)
+static int vcpu_stat_get(void *_offset, u64 *val)
 {
 	unsigned offset = (long)_offset;
-	u64 total = 0;
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
 	int i;
 
+	*val = 0;
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 			vcpu = kvm->vcpus[i];
 			if (vcpu)
-				total += *(u32 *)((void *)vcpu + offset);
+				*val += *(u32 *)((void *)vcpu + offset);
 		}
 	spin_unlock(&kvm_lock);
-	return total;
+	return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
-- 
cgit 


From 74bedc4d56211b30686c6f2f574bf6c6a9654887 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 8 Feb 2008 04:20:28 -0800
Subject: libfs: rename simple_attr_close to simple_attr_release

simple_attr_close implementes ->release so it should be named accordingly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: <stefano.brivio@polimi.it>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg KH <greg@kroah.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/file.c | 2 +-
 fs/libfs.c                               | 4 ++--
 include/linux/fs.h                       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 9326714717ca..e393144d533d 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -470,7 +470,7 @@ spufs_cntl_release(struct inode *inode, struct file *file)
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
-	simple_attr_close(inode, file);
+	simple_attr_release(inode, file);
 
 	mutex_lock(&ctx->mapping_lock);
 	if (!--i->i_openers)
diff --git a/fs/libfs.c b/fs/libfs.c
index d6de56a6e183..b004dfadd891 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -615,7 +615,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
 	return nonseekable_open(inode, file);
 }
 
-int simple_attr_close(struct inode *inode, struct file *file)
+int simple_attr_release(struct inode *inode, struct file *file)
 {
 	kfree(file->private_data);
 	return 0;
@@ -804,6 +804,6 @@ EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);
 EXPORT_SYMBOL_GPL(simple_attr_open);
-EXPORT_SYMBOL_GPL(simple_attr_close);
+EXPORT_SYMBOL_GPL(simple_attr_release);
 EXPORT_SYMBOL_GPL(simple_attr_read);
 EXPORT_SYMBOL_GPL(simple_attr_write);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ebe996ac2589..1137a8828089 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2056,7 +2056,7 @@ static int __fops ## _open(struct inode *inode, struct file *file)	\
 static struct file_operations __fops = {				\
 	.owner	 = THIS_MODULE,						\
 	.open	 = __fops ## _open,					\
-	.release = simple_attr_close,					\
+	.release = simple_attr_release,					\
 	.read	 = simple_attr_read,					\
 	.write	 = simple_attr_write,					\
 };
@@ -2070,7 +2070,7 @@ __simple_attr_check_format(const char *fmt, ...)
 int simple_attr_open(struct inode *inode, struct file *file,
 		     int (*get)(void *, u64 *), int (*set)(void *, u64),
 		     const char *fmt);
-int simple_attr_close(struct inode *inode, struct file *file);
+int simple_attr_release(struct inode *inode, struct file *file);
 ssize_t simple_attr_read(struct file *file, char __user *buf,
 			 size_t len, loff_t *ppos);
 ssize_t simple_attr_write(struct file *file, const char __user *buf,
-- 
cgit 


From 6c79e987d629cb0f8f7e2983725f4434a2dec66b Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Fri, 8 Feb 2008 04:20:30 -0800
Subject: udf: remove some ugly macros

remove macros:
- UDF_SB_PARTMAPS
- UDF_SB_PARTTYPE
- UDF_SB_PARTROOT
- UDF_SB_PARTLEN
- UDF_SB_PARTVSN
- UDF_SB_PARTNUM
- UDF_SB_TYPESPAR
- UDF_SB_TYPEVIRT
- UDF_SB_PARTFUNC
- UDF_SB_PARTFLAGS
- UDF_SB_VOLIDENT
- UDF_SB_NUMPARTS
- UDF_SB_PARTITION
- UDF_SB_SESSION
- UDF_SB_ANCHOR
- UDF_SB_LASTBLOCK
- UDF_SB_LVIDBH
- UDF_SB_LVID
- UDF_SB_UMASK
- UDF_SB_GID
- UDF_SB_UID
- UDF_SB_RECORDTIME
- UDF_SB_SERIALNUM
- UDF_SB_UDFREV
- UDF_SB_FLAGS
- UDF_SB_VAT
- UDF_UPDATE_UDFREV
- UDF_SB_FREE
and open code them

convert UDF_SB_LVIDIU macro to udf_sb_lvidiu function

rename some struct udf_sb_info fields:
- s_volident to s_volume_ident
- s_lastblock to s_last_block
- s_lvidbh to s_lvid_bh
- s_recordtime to s_record_time
- s_serialnum to s_serial_number;
- s_vat to s_vat_inode;

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Ben Fennema <bfennema@falcon.csc.calpoly.edu>
Cc: Jan Kara <jack@suse.cz>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/udf/balloc.c           | 136 ++++++------
 fs/udf/file.c             |   2 +-
 fs/udf/ialloc.c           |  33 +--
 fs/udf/inode.c            |  34 +--
 fs/udf/misc.c             |  15 +-
 fs/udf/namei.c            |  24 +-
 fs/udf/partition.c        |  67 +++---
 fs/udf/super.c            | 546 ++++++++++++++++++++++++++--------------------
 fs/udf/truncate.c         |   7 +-
 fs/udf/udf_sb.h           |  75 ++-----
 include/linux/udf_fs_sb.h |  12 +-
 11 files changed, 509 insertions(+), 442 deletions(-)

(limited to 'include/linux')

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index ab26176f6b91..8c0c27912278 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -88,7 +88,7 @@ static int read_block_bitmap(struct super_block *sb,
 	kernel_lb_addr loc;
 
 	loc.logicalBlockNum = bitmap->s_extPosition;
-	loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
+	loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
 
 	bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block));
 	if (!bh) {
@@ -155,10 +155,10 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
-	    (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) {
+	    (bloc.logicalBlockNum + count) > sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
-			  UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum));
+			  sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len);
 		goto error_return;
 	}
 
@@ -188,9 +188,10 @@ do_more:
 		} else {
 			if (inode)
 				DQUOT_FREE_BLOCK(inode, 1);
-			if (UDF_SB_LVIDBH(sb)) {
-				UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] =
-					cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]) + 1);
+			if (sbi->s_lvid_bh) {
+				struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+				lvid->freeSpaceTable[sbi->s_partition] =
+					cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[sbi->s_partition]) + 1);
 			}
 		}
 	}
@@ -202,8 +203,8 @@ do_more:
 	}
 error_return:
 	sb->s_dirt = 1;
-	if (UDF_SB_LVIDBH(sb))
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (sbi->s_lvid_bh)
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	mutex_unlock(&sbi->s_alloc_mutex);
 	return;
 }
@@ -219,16 +220,18 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 	int bit, block, block_group, group_start;
 	int nr_groups, bitmap_nr;
 	struct buffer_head *bh;
+	__u32 part_len;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition))
+	part_len = sbi->s_partmaps[partition].s_partition_len;
+	if (first_block < 0 || first_block >= part_len)
 		goto out;
 
-	if (first_block + block_count > UDF_SB_PARTLEN(sb, partition))
-		block_count = UDF_SB_PARTLEN(sb, partition) - first_block;
+	if (first_block + block_count > part_len)
+		block_count = part_len - first_block;
 
 repeat:
-	nr_groups = (UDF_SB_PARTLEN(sb, partition) +
+	nr_groups = (sbi->s_partmaps[partition].s_partition_len +
 		     (sizeof(struct spaceBitmapDesc) << 3) +
 		     (sb->s_blocksize * 8) - 1) / (sb->s_blocksize * 8);
 	block = first_block + (sizeof(struct spaceBitmapDesc) << 3);
@@ -261,10 +264,11 @@ repeat:
 	if (block_count > 0)
 		goto repeat;
 out:
-	if (UDF_SB_LVIDBH(sb)) {
-		UDF_SB_LVID(sb)->freeSpaceTable[partition] =
-			cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - alloc_count);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		lvid->freeSpaceTable[partition] =
+			cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[partition]) - alloc_count);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 	sb->s_dirt = 1;
 	mutex_unlock(&sbi->s_alloc_mutex);
@@ -287,7 +291,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
 	mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
-	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
+	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	nr_groups = bitmap->s_nr_groups;
@@ -389,10 +393,11 @@ got_block:
 
 	mark_buffer_dirty(bh);
 
-	if (UDF_SB_LVIDBH(sb)) {
-		UDF_SB_LVID(sb)->freeSpaceTable[partition] =
-			cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - 1);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		lvid->freeSpaceTable[partition] =
+			cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[partition]) - 1);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 	sb->s_dirt = 1;
 	mutex_unlock(&sbi->s_alloc_mutex);
@@ -421,10 +426,10 @@ static void udf_table_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	if (bloc.logicalBlockNum < 0 ||
-	    (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) {
+	    (bloc.logicalBlockNum + count) > sbi->s_partmaps[bloc.partitionReferenceNum].s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count,
-			  UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum));
+			  sbi->s_partmaps[bloc.partitionReferenceNum]->s_partition_len);
 		goto error_return;
 	}
 
@@ -432,10 +437,11 @@ static void udf_table_free_blocks(struct super_block *sb,
 	   but.. oh well */
 	if (inode)
 		DQUOT_FREE_BLOCK(inode, count);
-	if (UDF_SB_LVIDBH(sb)) {
-		UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] =
-			cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]) + count);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		lvid->freeSpaceTable[sbi->s_partition] =
+			cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[sbi->s_partition]) + count);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 
 	start = bloc.logicalBlockNum + offset;
@@ -559,7 +565,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 				}
 				epos.offset = sizeof(struct allocExtDesc);
 			}
-			if (UDF_SB_UDFREV(sb) >= 0x0200)
+			if (sbi->s_udfrev >= 0x0200)
 				udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1,
 					    epos.block.logicalBlockNum, sizeof(tag));
 			else
@@ -627,7 +633,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 	struct extent_position epos;
 	int8_t etype = -1;
 
-	if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition))
+	if (first_block < 0 || first_block >= sbi->s_partmaps[partition].s_partition_len)
 		return 0;
 
 	if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT)
@@ -670,10 +676,11 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 
 	brelse(epos.bh);
 
-	if (alloc_count && UDF_SB_LVIDBH(sb)) {
-		UDF_SB_LVID(sb)->freeSpaceTable[partition] =
-			cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - alloc_count);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (alloc_count && sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		lvid->freeSpaceTable[partition] =
+			cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[partition]) - alloc_count);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 		sb->s_dirt = 1;
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
@@ -703,7 +710,7 @@ static int udf_table_new_block(struct super_block *sb,
 		return newblock;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition))
+	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	/* We search for the closest matching block to goal. If we find a exact hit,
@@ -771,10 +778,11 @@ static int udf_table_new_block(struct super_block *sb,
 		udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
 	brelse(goal_epos.bh);
 
-	if (UDF_SB_LVIDBH(sb)) {
-		UDF_SB_LVID(sb)->freeSpaceTable[partition] =
-			cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition]) - 1);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		lvid->freeSpaceTable[partition] =
+			cpu_to_le32(le32_to_cpu(lvid->freeSpaceTable[partition]) - 1);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 
 	sb->s_dirt = 1;
@@ -789,22 +797,23 @@ inline void udf_free_blocks(struct super_block *sb,
 			    uint32_t count)
 {
 	uint16_t partition = bloc.partitionReferenceNum;
+	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
 
-	if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) {
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
 		return udf_bitmap_free_blocks(sb, inode,
-					      UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap,
+					      map->s_uspace.s_bitmap,
 					      bloc, offset, count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
 		return udf_table_free_blocks(sb, inode,
-					     UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table,
+					     map->s_uspace.s_table,
 					     bloc, offset, count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
 		return udf_bitmap_free_blocks(sb, inode,
-					      UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap,
+					      map->s_fspace.s_bitmap,
 					      bloc, offset, count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
 		return udf_table_free_blocks(sb, inode,
-					     UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table,
+					     map->s_fspace.s_table,
 					     bloc, offset, count);
 	} else {
 		return;
@@ -816,21 +825,23 @@ inline int udf_prealloc_blocks(struct super_block *sb,
 			       uint16_t partition, uint32_t first_block,
 			       uint32_t block_count)
 {
-	if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) {
+	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
+
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
 		return udf_bitmap_prealloc_blocks(sb, inode,
-						  UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap,
+						  map->s_uspace.s_bitmap,
 						  partition, first_block, block_count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
 		return udf_table_prealloc_blocks(sb, inode,
-						 UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table,
+						 map->s_uspace.s_table,
 						 partition, first_block, block_count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
 		return udf_bitmap_prealloc_blocks(sb, inode,
-						  UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap,
+						  map->s_fspace.s_bitmap,
 						  partition, first_block, block_count);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
 		return udf_table_prealloc_blocks(sb, inode,
-						 UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table,
+						 map->s_fspace.s_table,
 						 partition, first_block, block_count);
 	} else {
 		return 0;
@@ -842,23 +853,24 @@ inline int udf_new_block(struct super_block *sb,
 			 uint16_t partition, uint32_t goal, int *err)
 {
 	int ret;
+	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
 
-	if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) {
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
 		ret = udf_bitmap_new_block(sb, inode,
-					   UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap,
+					   map->s_uspace.s_bitmap,
 					   partition, goal, err);
 		return ret;
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
 		return udf_table_new_block(sb, inode,
-					   UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table,
+					   map->s_uspace.s_table,
 					   partition, goal, err);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
 		return udf_bitmap_new_block(sb, inode,
-					    UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap,
+					    map->s_fspace.s_bitmap,
 					    partition, goal, err);
-	} else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) {
+	} else if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
 		return udf_table_new_block(sb, inode,
-					   UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table,
+					   map->s_fspace.s_table,
 					   partition, goal, err);
 	} else {
 		*err = -EIO;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7c7a1b39d56c..3bd5068877fa 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -192,7 +192,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case UDF_GETVOLIDENT:
 		return copy_to_user((char __user *)arg,
-				    UDF_SB_VOLIDENT(inode->i_sb), 32) ? -EFAULT : 0;
+				    UDF_SB(inode->i_sb)->s_volume_ident, 32) ? -EFAULT : 0;
 	case UDF_RELOCATE_BLOCKS:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 636d8f613929..8145e943be61 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -43,15 +43,17 @@ void udf_free_inode(struct inode *inode)
 	clear_inode(inode);
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (sbi->s_lvidbh) {
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDescImpUse *lvidiu =
+							udf_sb_lvidiu(sbi);
 		if (S_ISDIR(inode->i_mode))
-			UDF_SB_LVIDIU(sb)->numDirs =
-				cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) - 1);
+			lvidiu->numDirs =
+				cpu_to_le32(le32_to_cpu(lvidiu->numDirs) - 1);
 		else
-			UDF_SB_LVIDIU(sb)->numFiles =
-				cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1);
+			lvidiu->numFiles =
+				cpu_to_le32(le32_to_cpu(lvidiu->numFiles) - 1);
 
-		mark_buffer_dirty(sbi->s_lvidbh);
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 	mutex_unlock(&sbi->s_alloc_mutex);
 
@@ -88,21 +90,23 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	}
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (UDF_SB_LVIDBH(sb)) {
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sbi);
 		struct logicalVolHeaderDesc *lvhd;
 		uint64_t uniqueID;
-		lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(sb)->logicalVolContentsUse);
+		lvhd = (struct logicalVolHeaderDesc *)(lvid->logicalVolContentsUse);
 		if (S_ISDIR(mode))
-			UDF_SB_LVIDIU(sb)->numDirs =
-				cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) + 1);
+			lvidiu->numDirs =
+				cpu_to_le32(le32_to_cpu(lvidiu->numDirs) + 1);
 		else
-			UDF_SB_LVIDIU(sb)->numFiles =
-				cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + 1);
+			lvidiu->numFiles =
+				cpu_to_le32(le32_to_cpu(lvidiu->numFiles) + 1);
 		UDF_I_UNIQUE(inode) = uniqueID = le64_to_cpu(lvhd->uniqueID);
 		if (!(++uniqueID & 0x00000000FFFFFFFFUL))
 			uniqueID += 16;
 		lvhd->uniqueID = cpu_to_le64(uniqueID);
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+		mark_buffer_dirty(sbi->s_lvid_bh);
 	}
 	inode->i_mode = mode;
 	inode->i_uid = current->fsuid;
@@ -123,7 +127,8 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	UDF_I_USE(inode) = 0;
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
 		UDF_I_EFE(inode) = 1;
-		UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE);
+		if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
+			sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
 		UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL);
 	} else {
 		UDF_I_EFE(inode) = 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6ff8151984cf..2eb1220e4236 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1081,6 +1081,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	time_t convtime;
 	long convtime_usec;
 	int offset;
+	struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
 
 	fe = (struct fileEntry *)bh->b_data;
 	efe = (struct extendedFileEntry *)bh->b_data;
@@ -1160,7 +1161,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_atime.tv_sec = convtime;
 			inode->i_atime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_atime = sbi->s_record_time;
 		}
 
 		if (udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1168,7 +1169,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_mtime.tv_sec = convtime;
 			inode->i_mtime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_mtime = sbi->s_record_time;
 		}
 
 		if (udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1176,7 +1177,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_ctime.tv_sec = convtime;
 			inode->i_ctime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_ctime = sbi->s_record_time;
 		}
 
 		UDF_I_UNIQUE(inode) = le64_to_cpu(fe->uniqueID);
@@ -1192,7 +1193,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_atime.tv_sec = convtime;
 			inode->i_atime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_atime = sbi->s_record_time;
 		}
 
 		if (udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1200,7 +1201,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_mtime.tv_sec = convtime;
 			inode->i_mtime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_mtime = sbi->s_record_time;
 		}
 
 		if (udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1208,7 +1209,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			UDF_I_CRTIME(inode).tv_sec = convtime;
 			UDF_I_CRTIME(inode).tv_nsec = convtime_usec * 1000;
 		} else {
-			UDF_I_CRTIME(inode) = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_CRTIME(inode) = sbi->s_record_time;
 		}
 
 		if (udf_stamp_to_time(&convtime, &convtime_usec,
@@ -1216,7 +1217,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 			inode->i_ctime.tv_sec = convtime;
 			inode->i_ctime.tv_nsec = convtime_usec * 1000;
 		} else {
-			inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb);
+			inode->i_ctime = sbi->s_record_time;
 		}
 
 		UDF_I_UNIQUE(inode) = le64_to_cpu(efe->uniqueID);
@@ -1353,6 +1354,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	int i;
 	kernel_timestamp cpu_time;
 	int err = 0;
+	struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
 
 	bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0));
 	if (!bh) {
@@ -1537,11 +1539,11 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 				ICBTAG_FLAG_SETGID | ICBTAG_FLAG_STICKY));
 
 	fe->icbTag.flags = cpu_to_le16(icbflags);
-	if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200)
+	if (sbi->s_udfrev >= 0x0200)
 		fe->descTag.descVersion = cpu_to_le16(3);
 	else
 		fe->descTag.descVersion = cpu_to_le16(2);
-	fe->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb));
+	fe->descTag.tagSerialNum = cpu_to_le16(sbi->s_serial_number);
 	fe->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum);
 	crclen += UDF_I_LENEATTR(inode) + UDF_I_LENALLOC(inode) - sizeof(tag);
 	fe->descTag.descCRCLength = cpu_to_le16(crclen);
@@ -1585,7 +1587,7 @@ struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino)
 	if (is_bad_inode(inode))
 		goto out_iput;
 
-	if (ino.logicalBlockNum >= UDF_SB_PARTLEN(sb, ino.partitionReferenceNum)) {
+	if (ino.logicalBlockNum >= UDF_SB(sb)->s_partmaps[ino.partitionReferenceNum].s_partition_len) {
 		udf_debug("block=%d, partition=%d out of range\n",
 			  ino.logicalBlockNum, ino.partitionReferenceNum);
 		make_bad_inode(inode);
@@ -1667,7 +1669,7 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos,
 				mark_inode_dirty(inode);
 			}
 		}
-		if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200)
+		if (UDF_SB(inode->i_sb)->s_udfrev >= 0x0200)
 			udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
 				    epos->block.logicalBlockNum, sizeof(tag));
 		else
@@ -1690,7 +1692,7 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos,
 		}
 		if (epos->bh) {
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
-			    UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(epos->bh->b_data, loffset);
 			else
 				udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc));
@@ -1711,7 +1713,7 @@ int8_t udf_add_aext(struct inode * inode, struct extent_position * epos,
 		aed = (struct allocExtDesc *)epos->bh->b_data;
 		aed->lengthAllocDescs =
 			cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
-		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 			udf_update_tag(epos->bh->b_data, epos->offset + (inc ? 0 : adsize));
 		else
 			udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc));
@@ -1754,7 +1756,7 @@ int8_t udf_write_aext(struct inode * inode, struct extent_position * epos,
 
 	if (epos->bh) {
 		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
-		    UDF_SB_UDFREV(inode->i_sb) >= 0x0201) {
+		    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) {
 			struct allocExtDesc *aed = (struct allocExtDesc *)epos->bh->b_data;
 			udf_update_tag(epos->bh->b_data,
 				       le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc));
@@ -1907,7 +1909,7 @@ int8_t udf_delete_aext(struct inode * inode, struct extent_position epos,
 			aed->lengthAllocDescs =
 				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2 * adsize));
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
-			    UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(oepos.bh->b_data, oepos.offset - (2 * adsize));
 			else
 				udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc));
@@ -1923,7 +1925,7 @@ int8_t udf_delete_aext(struct inode * inode, struct extent_position epos,
 			aed->lengthAllocDescs =
 				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize);
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
-			    UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(oepos.bh->b_data, epos.offset - adsize);
 			else
 				udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc));
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 15297deb5051..7cecb3098061 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -81,14 +81,16 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
 				return NULL;
 			}
 		} else {
+			struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
+
 			size -= sizeof(struct extendedAttrHeaderDesc);
 			UDF_I_LENEATTR(inode) += sizeof(struct extendedAttrHeaderDesc);
 			eahd->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EAHD);
-			if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200)
+			if (sbi->s_udfrev >= 0x0200)
 				eahd->descTag.descVersion = cpu_to_le16(3);
 			else
 				eahd->descTag.descVersion = cpu_to_le16(2);
-			eahd->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb));
+			eahd->descTag.tagSerialNum = cpu_to_le16(sbi->s_serial_number);
 			eahd->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum);
 			eahd->impAttrLocation = cpu_to_le32(0xFFFFFFFF);
 			eahd->appAttrLocation = cpu_to_le32(0xFFFFFFFF);
@@ -192,15 +194,16 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 	struct buffer_head *bh = NULL;
 	register uint8_t checksum;
 	register int i;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
 	/* Read the block */
 	if (block == 0xFFFFFFFF)
 		return NULL;
 
-	bh = udf_tread(sb, block + UDF_SB_SESSION(sb));
+	bh = udf_tread(sb, block + sbi->s_session);
 	if (!bh) {
 		udf_debug("block=%d, location=%d: read failed\n",
-			  block + UDF_SB_SESSION(sb), location);
+			  block + sbi->s_session, location);
 		return NULL;
 	}
 
@@ -210,7 +213,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 
 	if (location != le32_to_cpu(tag_p->tagLocation)) {
 		udf_debug("location mismatch block %u, tag %u != %u\n",
-			  block + UDF_SB_SESSION(sb), le32_to_cpu(tag_p->tagLocation), location);
+			  block + sbi->s_session, le32_to_cpu(tag_p->tagLocation), location);
 		goto error_out;
 	}
 
@@ -240,7 +243,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 		return bh;
 	}
 	udf_debug("Crc failure block %d: crc = %d, crclen = %d\n",
-		  block + UDF_SB_SESSION(sb), le16_to_cpu(tag_p->descCRC),
+		  block + sbi->s_session, le16_to_cpu(tag_p->descCRC),
 		  le16_to_cpu(tag_p->descCRCLength));
 
 error_out:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bec96a6b3343..86033d92824c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -325,7 +325,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 					   struct udf_fileident_bh *fibh,
 					   struct fileIdentDesc *cfi, int *err)
 {
-	struct super_block *sb;
+	struct super_block *sb = dir->i_sb;
 	struct fileIdentDesc *fi = NULL;
 	char name[UDF_NAME_LEN], fname[UDF_NAME_LEN];
 	int namelen;
@@ -342,8 +342,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 	sector_t offset;
 	struct extent_position epos = {};
 
-	sb = dir->i_sb;
-
 	if (dentry) {
 		if (!dentry->d_name.len) {
 			*err = -EINVAL;
@@ -535,7 +533,7 @@ add:
 	}
 
 	memset(cfi, 0, sizeof(struct fileIdentDesc));
-	if (UDF_SB_UDFREV(sb) >= 0x0200)
+	if (UDF_SB(sb)->s_udfrev >= 0x0200)
 		udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, sizeof(tag));
 	else
 		udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, sizeof(tag));
@@ -901,6 +899,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	int block;
 	char name[UDF_NAME_LEN];
 	int namelen;
+	struct buffer_head *bh;
 
 	lock_kernel();
 	if (!(inode = udf_new_inode(dir, S_IFLNK, &err)))
@@ -1014,17 +1013,19 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 		goto out_no_entry;
 	cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
 	cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode));
-	if (UDF_SB_LVIDBH(inode->i_sb)) {
+	bh = UDF_SB(inode->i_sb)->s_lvid_bh;
+	if (bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
 		struct logicalVolHeaderDesc *lvhd;
 		uint64_t uniqueID;
-		lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse);
+		lvhd = (struct logicalVolHeaderDesc *)(lvid->logicalVolContentsUse);
 		uniqueID = le64_to_cpu(lvhd->uniqueID);
 		*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
 			cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
 		if (!(++uniqueID & 0x00000000FFFFFFFFUL))
 			uniqueID += 16;
 		lvhd->uniqueID = cpu_to_le64(uniqueID);
-		mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb));
+		mark_buffer_dirty(bh);
 	}
 	udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
 	if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) {
@@ -1053,6 +1054,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	struct udf_fileident_bh fibh;
 	struct fileIdentDesc cfi, *fi;
 	int err;
+	struct buffer_head *bh;
 
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
@@ -1066,17 +1068,19 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	}
 	cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
 	cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode));
-	if (UDF_SB_LVIDBH(inode->i_sb)) {
+	bh = UDF_SB(inode->i_sb)->s_lvid_bh;
+	if (bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
 		struct logicalVolHeaderDesc *lvhd;
 		uint64_t uniqueID;
-		lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse);
+		lvhd = (struct logicalVolHeaderDesc *)(lvid->logicalVolContentsUse);
 		uniqueID = le64_to_cpu(lvhd->uniqueID);
 		*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
 			cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
 		if (!(++uniqueID & 0x00000000FFFFFFFFUL))
 			uniqueID += 16;
 		lvhd->uniqueID = cpu_to_le64(uniqueID);
-		mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb));
+		mark_buffer_dirty(bh);
 	}
 	udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
 	if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) {
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index aaab24c8c498..eeb4714b3641 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -31,15 +31,18 @@
 inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
 			       uint16_t partition, uint32_t offset)
 {
-	if (partition >= UDF_SB_NUMPARTS(sb)) {
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct udf_part_map *map;
+	if (partition >= sbi->s_partitions) {
 		udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
 			  block, partition, offset);
 		return 0xFFFFFFFF;
 	}
-	if (UDF_SB_PARTFUNC(sb, partition))
-		return UDF_SB_PARTFUNC(sb, partition)(sb, block, partition, offset);
+	map = &sbi->s_partmaps[partition];
+	if (map->s_partition_func)
+		return map->s_partition_func(sb, block, partition, offset);
 	else
-		return UDF_SB_PARTROOT(sb, partition) + block + offset;
+		return map->s_partition_root + block + offset;
 }
 
 uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
@@ -49,12 +52,15 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
 	uint32_t newblock;
 	uint32_t index;
 	uint32_t loc;
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct udf_part_map *map;
 
-	index = (sb->s_blocksize - UDF_SB_TYPEVIRT(sb,partition).s_start_offset) / sizeof(uint32_t);
+	map = &sbi->s_partmaps[partition];
+	index = (sb->s_blocksize - map->s_type_specific.s_virtual.s_start_offset) / sizeof(uint32_t);
 
-	if (block > UDF_SB_TYPEVIRT(sb,partition).s_num_entries) {
+	if (block > map->s_type_specific.s_virtual.s_num_entries) {
 		udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
-			  block, UDF_SB_TYPEVIRT(sb,partition).s_num_entries);
+			  block, map->s_type_specific.s_virtual.s_num_entries);
 		return 0xFFFFFFFF;
 	}
 
@@ -64,10 +70,10 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
 		index = block % (sb->s_blocksize / sizeof(uint32_t));
 	} else {
 		newblock = 0;
-		index = UDF_SB_TYPEVIRT(sb,partition).s_start_offset / sizeof(uint32_t) + block;
+		index = map->s_type_specific.s_virtual.s_start_offset / sizeof(uint32_t) + block;
 	}
 
-	loc = udf_block_map(UDF_SB_VAT(sb), newblock);
+	loc = udf_block_map(sbi->s_vat_inode, newblock);
 
 	if (!(bh = sb_bread(sb, loc))) {
 		udf_debug("get_pblock(UDF_VIRTUAL_MAP:%p,%d,%d) VAT: %d[%d]\n",
@@ -79,13 +85,13 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
 
 	brelse(bh);
 
-	if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) {
+	if (UDF_I_LOCATION(sbi->s_vat_inode).partitionReferenceNum == partition) {
 		udf_debug("recursive call to udf_get_pblock!\n");
 		return 0xFFFFFFFF;
 	}
 
 	return udf_get_pblock(sb, loc,
-			      UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum,
+			      UDF_I_LOCATION(sbi->s_vat_inode).partitionReferenceNum,
 			      offset);
 }
 
@@ -95,16 +101,21 @@ inline uint32_t udf_get_pblock_virt20(struct super_block * sb, uint32_t block,
 	return udf_get_pblock_virt15(sb, block, partition, offset);
 }
 
-uint32_t udf_get_pblock_spar15(struct super_block * sb, uint32_t block,
+uint32_t udf_get_pblock_spar15(struct super_block *sb, uint32_t block,
 			       uint16_t partition, uint32_t offset)
 {
 	int i;
 	struct sparingTable *st = NULL;
-	uint32_t packet = (block + offset) & ~(UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1);
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct udf_part_map *map;
+	uint32_t packet;
+
+	map = &sbi->s_partmaps[partition];
+	packet = (block + offset) & ~(map->s_type_specific.s_sparing.s_packet_len - 1);
 
 	for (i = 0; i < 4; i++) {
-		if (UDF_SB_TYPESPAR(sb,partition).s_spar_map[i] != NULL) {
-			st = (struct sparingTable *)UDF_SB_TYPESPAR(sb,partition).s_spar_map[i]->b_data;
+		if (map->s_type_specific.s_sparing.s_spar_map[i] != NULL) {
+			st = (struct sparingTable *)map->s_type_specific.s_sparing.s_spar_map[i]->b_data;
 			break;
 		}
 	}
@@ -115,14 +126,14 @@ uint32_t udf_get_pblock_spar15(struct super_block * sb, uint32_t block,
 				break;
 			} else if (le32_to_cpu(st->mapEntry[i].origLocation) == packet) {
 				return le32_to_cpu(st->mapEntry[i].mappedLocation) +
-					((block + offset) & (UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1));
+					((block + offset) & (map->s_type_specific.s_sparing.s_packet_len - 1));
 			} else if (le32_to_cpu(st->mapEntry[i].origLocation) > packet) {
 				break;
 			}
 		}
 	}
 
-	return UDF_SB_PARTROOT(sb,partition) + block + offset;
+	return map->s_partition_root + block + offset;
 }
 
 int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
@@ -132,15 +143,17 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
 	struct sparingEntry mapEntry;
 	uint32_t packet;
 	int i, j, k, l;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
-	for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) {
-		if (old_block > UDF_SB_PARTROOT(sb,i) &&
-		    old_block < UDF_SB_PARTROOT(sb,i) + UDF_SB_PARTLEN(sb,i)) {
-			sdata = &UDF_SB_TYPESPAR(sb,i);
-			packet = (old_block - UDF_SB_PARTROOT(sb,i)) & ~(sdata->s_packet_len - 1);
+	for (i = 0; i < sbi->s_partitions; i++) {
+		struct udf_part_map *map = &sbi->s_partmaps[i];
+		if (old_block > map->s_partition_root &&
+		    old_block < map->s_partition_root + map->s_partition_len) {
+			sdata = &map->s_type_specific.s_sparing;
+			packet = (old_block - map->s_partition_root) & ~(sdata->s_packet_len - 1);
 
 			for (j = 0; j < 4; j++) {
-				if (UDF_SB_TYPESPAR(sb,i).s_spar_map[j] != NULL) {
+				if (map->s_type_specific.s_sparing.s_spar_map[j] != NULL) {
 					st = (struct sparingTable *)sdata->s_spar_map[j]->b_data;
 					break;
 				}
@@ -160,11 +173,11 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
 						}
 					}
 					*new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) +
-						((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1));
+						((old_block - map->s_partition_root) & (sdata->s_packet_len - 1));
 					return 0;
 				} else if (le32_to_cpu(st->mapEntry[k].origLocation) == packet) {
 					*new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) +
-						((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1));
+						((old_block - map->s_partition_root) & (sdata->s_packet_len - 1));
 					return 0;
 				} else if (le32_to_cpu(st->mapEntry[k].origLocation) > packet) {
 					break;
@@ -185,7 +198,7 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
 						}
 					}
 					*new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) +
-						((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1));
+						((old_block - map->s_partition_root) & (sdata->s_packet_len - 1));
 					return 0;
 				}
 			}
@@ -194,7 +207,7 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
 		} /* if old_block */
 	}
 
-	if (i == UDF_SB_NUMPARTS(sb)) {
+	if (i == sbi->s_partitions) {
 		/* outside of partitions */
 		/* for now, fail =) */
 		return 1;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 57788f1ba2da..0ca2deb5b992 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -95,6 +95,14 @@ static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
 
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
+{
+	struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+	__u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions);
+	__u32 offset = number_of_partitions * 2 * sizeof(uint32_t)/sizeof(uint8_t);
+	return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
+}
+
 /* UDF filesystem type */
 static int udf_get_sb(struct file_system_type *fs_type,
 		      int flags, const char *dev_name, void *data,
@@ -461,22 +469,23 @@ void udf_write_super(struct super_block *sb)
 static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 {
 	struct udf_options uopt;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
-	uopt.flags = UDF_SB(sb)->s_flags;
-	uopt.uid   = UDF_SB(sb)->s_uid;
-	uopt.gid   = UDF_SB(sb)->s_gid;
-	uopt.umask = UDF_SB(sb)->s_umask;
+	uopt.flags = sbi->s_flags;
+	uopt.uid   = sbi->s_uid;
+	uopt.gid   = sbi->s_gid;
+	uopt.umask = sbi->s_umask;
 
 	if (!udf_parse_options(options, &uopt))
 		return -EINVAL;
 
-	UDF_SB(sb)->s_flags = uopt.flags;
-	UDF_SB(sb)->s_uid   = uopt.uid;
-	UDF_SB(sb)->s_gid   = uopt.gid;
-	UDF_SB(sb)->s_umask = uopt.umask;
+	sbi->s_flags = uopt.flags;
+	sbi->s_uid   = uopt.uid;
+	sbi->s_gid   = uopt.gid;
+	sbi->s_umask = uopt.umask;
 
-	if (UDF_SB_LVIDBH(sb)) {
-		int write_rev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev);
+	if (sbi->s_lvid_bh) {
+		int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
 		if (write_rev > UDF_MAX_WRITE_VERSION)
 			*flags |= MS_RDONLY;
 	}
@@ -538,17 +547,19 @@ static int udf_vrs(struct super_block *sb, int silent)
 	int iso9660 = 0;
 	int nsr02 = 0;
 	int nsr03 = 0;
+	struct udf_sb_info *sbi;
 
 	/* Block size must be a multiple of 512 */
 	if (sb->s_blocksize & 511)
 		return 0;
+	sbi = UDF_SB(sb);
 
 	if (sb->s_blocksize < sizeof(struct volStructDesc))
 		sectorsize = sizeof(struct volStructDesc);
 	else
 		sectorsize = sb->s_blocksize;
 
-	sector += (UDF_SB_SESSION(sb) << sb->s_blocksize_bits);
+	sector += (sbi->s_session << sb->s_blocksize_bits);
 
 	udf_debug("Starting at sector %u (%ld byte sectors)\n",
 		  (sector >> sb->s_blocksize_bits), sb->s_blocksize);
@@ -614,7 +625,7 @@ static int udf_vrs(struct super_block *sb, int silent)
 		return nsr03;
 	else if (nsr02)
 		return nsr02;
-	else if (sector - (UDF_SB_SESSION(sb) << sb->s_blocksize_bits) == 32768)
+	else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768)
 		return -1;
 	else
 		return 0;
@@ -639,11 +650,15 @@ static int udf_vrs(struct super_block *sb, int silent)
  */
 static void udf_find_anchor(struct super_block *sb)
 {
-	int lastblock = UDF_SB_LASTBLOCK(sb);
+	int lastblock;
 	struct buffer_head *bh = NULL;
 	uint16_t ident;
 	uint32_t location;
 	int i;
+	struct udf_sb_info *sbi;
+
+	sbi = UDF_SB(sb);
+	lastblock = sbi->s_last_block;
 
 	if (lastblock) {
 		int varlastblock = udf_variable_to_fixed(lastblock);
@@ -675,22 +690,22 @@ static void udf_find_anchor(struct super_block *sb)
 			}
 
 			if (ident == TAG_IDENT_AVDP) {
-				if (location == last[i] - UDF_SB_SESSION(sb)) {
-					lastblock = last[i] - UDF_SB_SESSION(sb);
-					UDF_SB_ANCHOR(sb)[0] = lastblock;
-					UDF_SB_ANCHOR(sb)[1] = lastblock - 256;
-				} else if (location == udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb)) {
+				if (location == last[i] - sbi->s_session) {
+					lastblock = last[i] - sbi->s_session;
+					sbi->s_anchor[0] = lastblock;
+					sbi->s_anchor[1] = lastblock - 256;
+				} else if (location == udf_variable_to_fixed(last[i]) - sbi->s_session) {
 					UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
-					lastblock = udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb);
-					UDF_SB_ANCHOR(sb)[0] = lastblock;
-					UDF_SB_ANCHOR(sb)[1] = lastblock - 256 - UDF_SB_SESSION(sb);
+					lastblock = udf_variable_to_fixed(last[i]) - sbi->s_session;
+					sbi->s_anchor[0] = lastblock;
+					sbi->s_anchor[1] = lastblock - 256 - sbi->s_session;
 				} else {
 					udf_debug("Anchor found at block %d, location mismatch %d.\n",
 						  last[i], location);
 				}
 			} else if (ident == TAG_IDENT_FE || ident == TAG_IDENT_EFE) {
 				lastblock = last[i];
-				UDF_SB_ANCHOR(sb)[3] = 512;
+				sbi->s_anchor[3] = 512;
 			} else {
 				ident = location = 0;
 				if (last[i] >= 256) {
@@ -704,13 +719,13 @@ static void udf_find_anchor(struct super_block *sb)
 				}
 
 				if (ident == TAG_IDENT_AVDP &&
-				    location == last[i] - 256 - UDF_SB_SESSION(sb)) {
+				    location == last[i] - 256 - sbi->s_session) {
 					lastblock = last[i];
-					UDF_SB_ANCHOR(sb)[1] = last[i] - 256;
+					sbi->s_anchor[1] = last[i] - 256;
 				} else {
 					ident = location = 0;
-					if (last[i] >= 312 + UDF_SB_SESSION(sb)) {
-						bh = sb_bread(sb, last[i] - 312 - UDF_SB_SESSION(sb));
+					if (last[i] >= 312 + sbi->s_session) {
+						bh = sb_bread(sb, last[i] - 312 - sbi->s_session);
 						if (bh) {
 							tag *t = (tag *)bh->b_data;
 							ident = le16_to_cpu(t->tagIdent);
@@ -723,7 +738,7 @@ static void udf_find_anchor(struct super_block *sb)
 					    location == udf_variable_to_fixed(last[i]) - 256) {
 						UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
 						lastblock = udf_variable_to_fixed(last[i]);
-						UDF_SB_ANCHOR(sb)[1] = lastblock - 256;
+						sbi->s_anchor[1] = lastblock - 256;
 					}
 				}
 			}
@@ -732,7 +747,7 @@ static void udf_find_anchor(struct super_block *sb)
 
 	if (!lastblock) {
 		/* We haven't found the lastblock. check 312 */
-		bh = sb_bread(sb, 312 + UDF_SB_SESSION(sb));
+		bh = sb_bread(sb, 312 + sbi->s_session);
 		if (bh) {
 			tag *t = (tag *)bh->b_data;
 			ident = le16_to_cpu(t->tagIdent);
@@ -744,22 +759,22 @@ static void udf_find_anchor(struct super_block *sb)
 		}
 	}
 
-	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
-		if (UDF_SB_ANCHOR(sb)[i]) {
-			bh = udf_read_tagged(sb, UDF_SB_ANCHOR(sb)[i],
-					     UDF_SB_ANCHOR(sb)[i], &ident);
+	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
+		if (sbi->s_anchor[i]) {
+			bh = udf_read_tagged(sb, sbi->s_anchor[i],
+					     sbi->s_anchor[i], &ident);
 			if (!bh)
-				UDF_SB_ANCHOR(sb)[i] = 0;
+				sbi->s_anchor[i] = 0;
 			else {
 				brelse(bh);
 				if ((ident != TAG_IDENT_AVDP) &&
 				    (i || (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE)))
-					UDF_SB_ANCHOR(sb)[i] = 0;
+					sbi->s_anchor[i] = 0;
 			}
 		}
 	}
 
-	UDF_SB_LASTBLOCK(sb) = lastblock;
+	sbi->s_last_block = lastblock;
 }
 
 static int udf_find_fileset(struct super_block *sb,
@@ -769,6 +784,7 @@ static int udf_find_fileset(struct super_block *sb,
 	struct buffer_head *bh = NULL;
 	long lastblock;
 	uint16_t ident;
+	struct udf_sb_info *sbi;
 
 	if (fileset->logicalBlockNum != 0xFFFFFFFF ||
 	    fileset->partitionReferenceNum != 0xFFFF) {
@@ -783,6 +799,7 @@ static int udf_find_fileset(struct super_block *sb,
 
 	}
 
+	sbi = UDF_SB(sb);
 	if (!bh) {
 		/* Search backwards through the partitions */
 		kernel_lb_addr newfileset;
@@ -790,13 +807,14 @@ static int udf_find_fileset(struct super_block *sb,
 /* --> cvg: FIXME - is it reasonable? */
 		return 1;
 
-		for (newfileset.partitionReferenceNum = UDF_SB_NUMPARTS(sb) - 1;
+		for (newfileset.partitionReferenceNum = sbi->s_partitions - 1;
 		     (newfileset.partitionReferenceNum != 0xFFFF &&
 		      fileset->logicalBlockNum == 0xFFFFFFFF &&
 		      fileset->partitionReferenceNum == 0xFFFF);
 		     newfileset.partitionReferenceNum--) {
-			lastblock = UDF_SB_PARTLEN(sb,
-					newfileset.partitionReferenceNum);
+			lastblock = sbi->s_partmaps
+					[newfileset.partitionReferenceNum]
+						.s_partition_len;
 			newfileset.logicalBlockNum = 0;
 
 			do {
@@ -840,7 +858,7 @@ static int udf_find_fileset(struct super_block *sb,
 			  fileset->logicalBlockNum,
 			  fileset->partitionReferenceNum);
 
-		UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum;
+		sbi->s_partition = fileset->partitionReferenceNum;
 		udf_load_fileset(sb, bh, root);
 		brelse(bh);
 		return 0;
@@ -867,15 +885,15 @@ static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh)
 			  recording, recording_usec,
 			  ts.year, ts.month, ts.day, ts.hour,
 			  ts.minute, ts.typeAndTimezone);
-		UDF_SB_RECORDTIME(sb).tv_sec = recording;
-		UDF_SB_RECORDTIME(sb).tv_nsec = recording_usec * 1000;
+		UDF_SB(sb)->s_record_time.tv_sec = recording;
+		UDF_SB(sb)->s_record_time.tv_nsec = recording_usec * 1000;
 	}
 
 	if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) {
 		if (udf_CS0toUTF8(&outstr, &instr)) {
-			strncpy(UDF_SB_VOLIDENT(sb), outstr.u_name,
+			strncpy(UDF_SB(sb)->s_volume_ident, outstr.u_name,
 				outstr.u_len > 31 ? 31 : outstr.u_len);
-			udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb));
+			udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
 		}
 	}
 
@@ -894,7 +912,7 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
 
 	*root = lelb_to_cpu(fset->rootDirectoryICB.extLocation);
 
-	UDF_SB_SERIALNUM(sb) = le16_to_cpu(fset->descTag.tagSerialNum);
+	UDF_SB(sb)->s_serial_number = le16_to_cpu(fset->descTag.tagSerialNum);
 
 	udf_debug("Rootdir at block=%d, partition=%d\n",
 		  root->logicalBlockNum, root->partitionReferenceNum);
@@ -904,23 +922,27 @@ static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 {
 	struct partitionDesc *p;
 	int i;
+	struct udf_part_map *map;
+	struct udf_sb_info *sbi;
 
 	p = (struct partitionDesc *)bh->b_data;
+	sbi = UDF_SB(sb);
 
-	for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) {
+	for (i = 0; i < sbi->s_partitions; i++) {
+		map = &sbi->s_partmaps[i];
 		udf_debug("Searching map: (%d == %d)\n",
-			  UDF_SB_PARTMAPS(sb)[i].s_partition_num, le16_to_cpu(p->partitionNumber));
-		if (UDF_SB_PARTMAPS(sb)[i].s_partition_num == le16_to_cpu(p->partitionNumber)) {
-			UDF_SB_PARTLEN(sb, i) = le32_to_cpu(p->partitionLength); /* blocks */
-			UDF_SB_PARTROOT(sb, i) = le32_to_cpu(p->partitionStartingLocation);
+			  map->s_partition_num, le16_to_cpu(p->partitionNumber));
+		if (map->s_partition_num == le16_to_cpu(p->partitionNumber)) {
+			map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */
+			map->s_partition_root = le32_to_cpu(p->partitionStartingLocation);
 			if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_READ_ONLY)
-				UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_READ_ONLY;
+				map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY;
 			if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_WRITE_ONCE)
-				UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_WRITE_ONCE;
+				map->s_partition_flags |= UDF_PART_FLAG_WRITE_ONCE;
 			if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_REWRITABLE)
-				UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_REWRITABLE;
+				map->s_partition_flags |= UDF_PART_FLAG_REWRITABLE;
 			if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_OVERWRITABLE)
-				UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_OVERWRITABLE;
+				map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
 
 			if (!strcmp(p->partitionContents.ident,
 				    PD_PARTITION_CONTENTS_NSR02) ||
@@ -935,26 +957,26 @@ static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 						.partitionReferenceNum = i,
 					};
 
-					UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table =
+					map->s_uspace.s_table =
 						udf_iget(sb, loc);
-					if (!UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table) {
+					if (!map->s_uspace.s_table) {
 						udf_debug("cannot load unallocSpaceTable (part %d)\n", i);
 						return 1;
 					}
-					UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_UNALLOC_TABLE;
+					map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
 					udf_debug("unallocSpaceTable (part %d) @ %ld\n",
-						  i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table->i_ino);
+						  i, map->s_uspace.s_table->i_ino);
 				}
 				if (phd->unallocSpaceBitmap.extLength) {
 					UDF_SB_ALLOC_BITMAP(sb, i, s_uspace);
-					if (UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap != NULL) {
-						UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extLength =
+					if (map->s_uspace.s_bitmap != NULL) {
+						map->s_uspace.s_bitmap->s_extLength =
 							le32_to_cpu(phd->unallocSpaceBitmap.extLength);
-						UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition =
+						map->s_uspace.s_bitmap->s_extPosition =
 							le32_to_cpu(phd->unallocSpaceBitmap.extPosition);
-						UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_UNALLOC_BITMAP;
+						map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
 						udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
-							  i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition);
+							  i, map->s_uspace.s_bitmap->s_extPosition);
 					}
 				}
 				if (phd->partitionIntegrityTable.extLength)
@@ -965,41 +987,42 @@ static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 						.partitionReferenceNum = i,
 					};
 
-					UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table =
+					map->s_fspace.s_table =
 						udf_iget(sb, loc);
-					if (!UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table) {
+					if (!map->s_fspace.s_table) {
 						udf_debug("cannot load freedSpaceTable (part %d)\n", i);
 						return 1;
 					}
-					UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_FREED_TABLE;
+					map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
 					udf_debug("freedSpaceTable (part %d) @ %ld\n",
-						  i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table->i_ino);
+						  i, map->s_fspace.s_table->i_ino);
 				}
 				if (phd->freedSpaceBitmap.extLength) {
 					UDF_SB_ALLOC_BITMAP(sb, i, s_fspace);
-					if (UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap != NULL) {
-						UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extLength =
+					if (map->s_fspace.s_bitmap != NULL) {
+						map->s_fspace.s_bitmap->s_extLength =
 							le32_to_cpu(phd->freedSpaceBitmap.extLength);
-						UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition =
+						map->s_fspace.s_bitmap->s_extPosition =
 							le32_to_cpu(phd->freedSpaceBitmap.extPosition);
-						UDF_SB_PARTFLAGS(sb, i) |= UDF_PART_FLAG_FREED_BITMAP;
+						map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
 						udf_debug("freedSpaceBitmap (part %d) @ %d\n",
-							  i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition);
+							  i, map->s_fspace.s_bitmap->s_extPosition);
 					}
 				}
 			}
 			break;
 		}
 	}
-	if (i == UDF_SB_NUMPARTS(sb)) {
+	if (i == sbi->s_partitions) {
 		udf_debug("Partition (%d) not found in partition map\n",
 			  le16_to_cpu(p->partitionNumber));
 	} else {
 		udf_debug("Partition (%d:%d type %x) starts at physical %d, "
 			  "block length %d\n",
 			  le16_to_cpu(p->partitionNumber), i,
-			  UDF_SB_PARTTYPE(sb, i), UDF_SB_PARTROOT(sb, i),
-			  UDF_SB_PARTLEN(sb, i));
+			  map->s_partition_type,
+			  map->s_partition_root,
+			  map->s_partition_len);
 	}
 	return 0;
 }
@@ -1010,30 +1033,32 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh,
 	struct logicalVolDesc *lvd;
 	int i, j, offset;
 	uint8_t type;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 
 	lvd = (struct logicalVolDesc *)bh->b_data;
 
 	UDF_SB_ALLOC_PARTMAPS(sb, le32_to_cpu(lvd->numPartitionMaps));
 
 	for (i = 0, offset = 0;
-	     i < UDF_SB_NUMPARTS(sb) && offset < le32_to_cpu(lvd->mapTableLength);
+	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
 	     i++, offset += ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapLength) {
+	     	struct udf_part_map *map = &sbi->s_partmaps[i];
 		type = ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapType;
 		if (type == 1) {
 			struct genericPartitionMap1 *gpm1 = (struct genericPartitionMap1 *)&(lvd->partitionMaps[offset]);
-			UDF_SB_PARTTYPE(sb, i) = UDF_TYPE1_MAP15;
-			UDF_SB_PARTVSN(sb, i) = le16_to_cpu(gpm1->volSeqNum);
-			UDF_SB_PARTNUM(sb, i) = le16_to_cpu(gpm1->partitionNum);
-			UDF_SB_PARTFUNC(sb, i) = NULL;
+			map->s_partition_type = UDF_TYPE1_MAP15;
+			map->s_volumeseqnum = le16_to_cpu(gpm1->volSeqNum);
+			map->s_partition_num = le16_to_cpu(gpm1->partitionNum);
+			map->s_partition_func = NULL;
 		} else if (type == 2) {
 			struct udfPartitionMap2 *upm2 = (struct udfPartitionMap2 *)&(lvd->partitionMaps[offset]);
 			if (!strncmp(upm2->partIdent.ident, UDF_ID_VIRTUAL, strlen(UDF_ID_VIRTUAL))) {
 				if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0150) {
-					UDF_SB_PARTTYPE(sb, i) = UDF_VIRTUAL_MAP15;
-					UDF_SB_PARTFUNC(sb, i) = udf_get_pblock_virt15;
+					map->s_partition_type = UDF_VIRTUAL_MAP15;
+					map->s_partition_func = udf_get_pblock_virt15;
 				} else if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0200) {
-					UDF_SB_PARTTYPE(sb, i) = UDF_VIRTUAL_MAP20;
-					UDF_SB_PARTFUNC(sb, i) = udf_get_pblock_virt20;
+					map->s_partition_type = UDF_VIRTUAL_MAP20;
+					map->s_partition_func = udf_get_pblock_virt20;
 				}
 			} else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) {
 				uint32_t loc;
@@ -1041,33 +1066,33 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh,
 				struct sparingTable *st;
 				struct sparablePartitionMap *spm = (struct sparablePartitionMap *)&(lvd->partitionMaps[offset]);
 
-				UDF_SB_PARTTYPE(sb, i) = UDF_SPARABLE_MAP15;
-				UDF_SB_TYPESPAR(sb, i).s_packet_len = le16_to_cpu(spm->packetLength);
+				map->s_partition_type = UDF_SPARABLE_MAP15;
+				map->s_type_specific.s_sparing.s_packet_len = le16_to_cpu(spm->packetLength);
 				for (j = 0; j < spm->numSparingTables; j++) {
 					loc = le32_to_cpu(spm->locSparingTable[j]);
-					UDF_SB_TYPESPAR(sb, i).s_spar_map[j] =
+					map->s_type_specific.s_sparing.s_spar_map[j] =
 						udf_read_tagged(sb, loc, loc, &ident);
-					if (UDF_SB_TYPESPAR(sb, i).s_spar_map[j] != NULL) {
-						st = (struct sparingTable *)UDF_SB_TYPESPAR(sb, i).s_spar_map[j]->b_data;
+					if (map->s_type_specific.s_sparing.s_spar_map[j] != NULL) {
+						st = (struct sparingTable *)map->s_type_specific.s_sparing.s_spar_map[j]->b_data;
 						if (ident != 0 ||
 						    strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING))) {
-							brelse(UDF_SB_TYPESPAR(sb, i).s_spar_map[j]);
-							UDF_SB_TYPESPAR(sb, i).s_spar_map[j] = NULL;
+							brelse(map->s_type_specific.s_sparing.s_spar_map[j]);
+							map->s_type_specific.s_sparing.s_spar_map[j] = NULL;
 						}
 					}
 				}
-				UDF_SB_PARTFUNC(sb, i) = udf_get_pblock_spar15;
+				map->s_partition_func = udf_get_pblock_spar15;
 			} else {
 				udf_debug("Unknown ident: %s\n",
 					  upm2->partIdent.ident);
 				continue;
 			}
-			UDF_SB_PARTVSN(sb, i) = le16_to_cpu(upm2->volSeqNum);
-			UDF_SB_PARTNUM(sb, i) = le16_to_cpu(upm2->partitionNum);
+			map->s_volumeseqnum = le16_to_cpu(upm2->volSeqNum);
+			map->s_partition_num = le16_to_cpu(upm2->partitionNum);
 		}
 		udf_debug("Partition (%d:%d) type %d on volume %d\n",
-			  i, UDF_SB_PARTNUM(sb, i), type,
-			  UDF_SB_PARTVSN(sb, i));
+			  i, map->s_partition_num, type,
+			  map->s_volumeseqnum);
 	}
 
 	if (fileset) {
@@ -1092,23 +1117,26 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
 {
 	struct buffer_head *bh = NULL;
 	uint16_t ident;
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct logicalVolIntegrityDesc *lvid;
 
 	while (loc.extLength > 0 &&
 	       (bh = udf_read_tagged(sb, loc.extLocation,
 				     loc.extLocation, &ident)) &&
 	       ident == TAG_IDENT_LVID) {
-		UDF_SB_LVIDBH(sb) = bh;
+		sbi->s_lvid_bh = bh;
+		lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
 
-		if (UDF_SB_LVID(sb)->nextIntegrityExt.extLength)
+		if (lvid->nextIntegrityExt.extLength)
 			udf_load_logicalvolint(sb,
-				leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt));
+				leea_to_cpu(lvid->nextIntegrityExt));
 
-		if (UDF_SB_LVIDBH(sb) != bh)
+		if (sbi->s_lvid_bh != bh)
 			brelse(bh);
 		loc.extLength -= sb->s_blocksize;
 		loc.extLocation++;
 	}
-	if (UDF_SB_LVIDBH(sb) != bh)
+	if (sbi->s_lvid_bh != bh)
 		brelse(bh);
 }
 
@@ -1259,10 +1287,11 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent)
 	else {
 		block = udf_vrs(sb, silent);
 		if (block == -1) {
+			struct udf_sb_info *sbi = UDF_SB(sb);
 			udf_debug("Failed to read byte 32768. Assuming open "
 				  "disc. Skipping validity check\n");
-			if (!UDF_SB_LASTBLOCK(sb))
-				UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb);
+			if (!sbi->s_last_block)
+				sbi->s_last_block = udf_get_last_block(sb);
 			return 0;
 		} else
 			return !block;
@@ -1276,14 +1305,16 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 	struct buffer_head *bh;
 	long main_s, main_e, reserve_s, reserve_e;
 	int i, j;
+	struct udf_sb_info *sbi;
 
 	if (!sb)
 		return 1;
+	sbi = UDF_SB(sb);
 
-	for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) {
-		if (UDF_SB_ANCHOR(sb)[i] &&
-		    (bh = udf_read_tagged(sb, UDF_SB_ANCHOR(sb)[i],
-					  UDF_SB_ANCHOR(sb)[i], &ident))) {
+	for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) {
+		if (sbi->s_anchor[i] &&
+		    (bh = udf_read_tagged(sb, sbi->s_anchor[i],
+					  sbi->s_anchor[i], &ident))) {
 			anchor = (struct anchorVolDescPtr *)bh->b_data;
 
 			/* Locate the main sequence */
@@ -1308,68 +1339,72 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 		}
 	}
 
-	if (i == ARRAY_SIZE(UDF_SB_ANCHOR(sb))) {
+	if (i == ARRAY_SIZE(sbi->s_anchor)) {
 		udf_debug("No Anchor block found\n");
 		return 1;
 	} else
-		udf_debug("Using anchor in block %d\n", UDF_SB_ANCHOR(sb)[i]);
+		udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]);
 
-	for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) {
+	for (i = 0; i < sbi->s_partitions; i++) {
 		kernel_lb_addr uninitialized_var(ino);
-		switch (UDF_SB_PARTTYPE(sb, i)) {
+		struct udf_part_map *map = &sbi->s_partmaps[i];
+		switch (map->s_partition_type) {
 		case UDF_VIRTUAL_MAP15:
 		case UDF_VIRTUAL_MAP20:
-			if (!UDF_SB_LASTBLOCK(sb)) {
-				UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb);
+			if (!sbi->s_last_block) {
+				sbi->s_last_block = udf_get_last_block(sb);
 				udf_find_anchor(sb);
 			}
 
-			if (!UDF_SB_LASTBLOCK(sb)) {
+			if (!sbi->s_last_block) {
 				udf_debug("Unable to determine Lastblock (For "
 					  "Virtual Partition)\n");
 				return 1;
 			}
 
-			for (j = 0; j < UDF_SB_NUMPARTS(sb); j++) {
+			for (j = 0; j < sbi->s_partitions; j++) {
+				struct udf_part_map *map2 = &sbi->s_partmaps[j];
 				if (j != i &&
-				    UDF_SB_PARTVSN(sb, i) == UDF_SB_PARTVSN(sb, j) &&
-				    UDF_SB_PARTNUM(sb, i) == UDF_SB_PARTNUM(sb, j)) {
+				    map->s_volumeseqnum == map2->s_volumeseqnum &&
+				    map->s_partition_num == map2->s_partition_num) {
 					ino.partitionReferenceNum = j;
-					ino.logicalBlockNum = UDF_SB_LASTBLOCK(sb) - UDF_SB_PARTROOT(sb, j);
+					ino.logicalBlockNum = sbi->s_last_block - map2->s_partition_root;
 					break;
 				}
 			}
 
-			if (j == UDF_SB_NUMPARTS(sb))
+			if (j == sbi->s_partitions)
 				return 1;
 
-			UDF_SB_VAT(sb) = udf_iget(sb, ino);
-			if (!UDF_SB_VAT(sb))
+			sbi->s_vat_inode = udf_iget(sb, ino);
+			if (!sbi->s_vat_inode)
 				return 1;
 
-			if (UDF_SB_PARTTYPE(sb, i) == UDF_VIRTUAL_MAP15) {
-				UDF_SB_TYPEVIRT(sb, i).s_start_offset =
-					udf_ext0_offset(UDF_SB_VAT(sb));
-				UDF_SB_TYPEVIRT(sb, i).s_num_entries =
-					(UDF_SB_VAT(sb)->i_size - 36) >> 2;
-			} else if (UDF_SB_PARTTYPE(sb, i) == UDF_VIRTUAL_MAP20) {
+			if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
+				map->s_type_specific.s_virtual.s_start_offset =
+					udf_ext0_offset(sbi->s_vat_inode);
+				map->s_type_specific.s_virtual.s_num_entries =
+					(sbi->s_vat_inode->i_size - 36) >> 2;
+			} else if (map->s_partition_type == UDF_VIRTUAL_MAP20) {
 				struct buffer_head *bh = NULL;
 				uint32_t pos;
 
-				pos = udf_block_map(UDF_SB_VAT(sb), 0);
+				pos = udf_block_map(sbi->s_vat_inode, 0);
 				bh = sb_bread(sb, pos);
 				if (!bh)
 					return 1;
-				UDF_SB_TYPEVIRT(sb, i).s_start_offset =
+				map->s_type_specific.s_virtual.s_start_offset =
 					le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data +
-						     udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) +
-					udf_ext0_offset(UDF_SB_VAT(sb));
-				UDF_SB_TYPEVIRT(sb, i).s_num_entries = (UDF_SB_VAT(sb)->i_size -
-									UDF_SB_TYPEVIRT(sb, i).s_start_offset) >> 2;
+						     udf_ext0_offset(sbi->s_vat_inode))->lengthHeader) +
+					udf_ext0_offset(sbi->s_vat_inode);
+				map->s_type_specific.s_virtual.s_num_entries = (sbi->s_vat_inode->i_size -
+									map->s_type_specific.s_virtual.s_start_offset) >> 2;
 				brelse(bh);
 			}
-			UDF_SB_PARTROOT(sb, i) = udf_get_pblock(sb, 0, i, 0);
-			UDF_SB_PARTLEN(sb, i) = UDF_SB_PARTLEN(sb, ino.partitionReferenceNum);
+			map->s_partition_root = udf_get_pblock(sb, 0, i, 0);
+			map->s_partition_len =
+				sbi->s_partmaps[ino.partitionReferenceNum].
+								s_partition_len;
 		}
 	}
 	return 0;
@@ -1377,26 +1412,30 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
 
 static void udf_open_lvid(struct super_block *sb)
 {
-	if (UDF_SB_LVIDBH(sb)) {
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct buffer_head *bh = sbi->s_lvid_bh;
+	if (bh) {
 		int i;
 		kernel_timestamp cpu_time;
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+		struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sbi);
 
-		UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
-		UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+		lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+		lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
 		if (udf_time_to_stamp(&cpu_time, CURRENT_TIME))
-			UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time);
-		UDF_SB_LVID(sb)->integrityType = LVID_INTEGRITY_TYPE_OPEN;
+			lvid->recordingDateAndTime = cpu_to_lets(cpu_time);
+		lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN;
 
-		UDF_SB_LVID(sb)->descTag.descCRC = cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag),
-								       le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0));
+		lvid->descTag.descCRC = cpu_to_le16(udf_crc((char *)lvid + sizeof(tag),
+								       le16_to_cpu(lvid->descTag.descCRCLength), 0));
 
-		UDF_SB_LVID(sb)->descTag.tagChecksum = 0;
+		lvid->descTag.tagChecksum = 0;
 		for (i = 0; i < 16; i++)
 			if (i != 4)
-				UDF_SB_LVID(sb)->descTag.tagChecksum +=
-					((uint8_t *) &(UDF_SB_LVID(sb)->descTag))[i];
+				lvid->descTag.tagChecksum +=
+					((uint8_t *) &(lvid->descTag))[i];
 
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+		mark_buffer_dirty(bh);
 	}
 }
 
@@ -1404,32 +1443,40 @@ static void udf_close_lvid(struct super_block *sb)
 {
 	kernel_timestamp cpu_time;
 	int i;
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct buffer_head *bh = sbi->s_lvid_bh;
+	struct logicalVolIntegrityDesc *lvid;
 
-	if (UDF_SB_LVIDBH(sb) &&
-	    UDF_SB_LVID(sb)->integrityType == LVID_INTEGRITY_TYPE_OPEN) {
-		UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
-		UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+	if (!bh)
+		return;
+
+	lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+
+	if (lvid->integrityType == LVID_INTEGRITY_TYPE_OPEN) {
+		struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sbi);
+		lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+		lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
 		if (udf_time_to_stamp(&cpu_time, CURRENT_TIME))
-			UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time);
-		if (UDF_MAX_WRITE_VERSION > le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev))
-			UDF_SB_LVIDIU(sb)->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION);
-		if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev))
-			UDF_SB_LVIDIU(sb)->minUDFReadRev = cpu_to_le16(UDF_SB_UDFREV(sb));
-		if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev))
-			UDF_SB_LVIDIU(sb)->minUDFWriteRev = cpu_to_le16(UDF_SB_UDFREV(sb));
-		UDF_SB_LVID(sb)->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
-
-		UDF_SB_LVID(sb)->descTag.descCRC =
-			cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag),
-					    le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0));
-
-		UDF_SB_LVID(sb)->descTag.tagChecksum = 0;
+			lvid->recordingDateAndTime = cpu_to_lets(cpu_time);
+		if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev))
+			lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION);
+		if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev))
+			lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev);
+		if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev))
+			lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev);
+		lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE);
+
+		lvid->descTag.descCRC =
+			cpu_to_le16(udf_crc((char *)lvid + sizeof(tag),
+					    le16_to_cpu(lvid->descTag.descCRCLength), 0));
+
+		lvid->descTag.tagChecksum = 0;
 		for (i = 0; i < 16; i++)
 			if (i != 4)
-				UDF_SB_LVID(sb)->descTag.tagChecksum +=
-					((uint8_t *)&(UDF_SB_LVID(sb)->descTag))[i];
+				lvid->descTag.tagChecksum +=
+					((uint8_t *)&(lvid->descTag))[i];
 
-		mark_buffer_dirty(UDF_SB_LVIDBH(sb));
+		mark_buffer_dirty(bh);
 	}
 }
 
@@ -1462,12 +1509,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	uopt.gid = -1;
 	uopt.umask = 0;
 
-	sbi = kmalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
-	memset(UDF_SB(sb), 0x00, sizeof(struct udf_sb_info));
 
 	mutex_init(&sbi->s_alloc_mutex);
 
@@ -1495,27 +1541,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	fileset.logicalBlockNum = 0xFFFFFFFF;
 	fileset.partitionReferenceNum = 0xFFFF;
 
-	UDF_SB(sb)->s_flags = uopt.flags;
-	UDF_SB(sb)->s_uid = uopt.uid;
-	UDF_SB(sb)->s_gid = uopt.gid;
-	UDF_SB(sb)->s_umask = uopt.umask;
-	UDF_SB(sb)->s_nls_map = uopt.nls_map;
+	sbi->s_flags = uopt.flags;
+	sbi->s_uid = uopt.uid;
+	sbi->s_gid = uopt.gid;
+	sbi->s_umask = uopt.umask;
+	sbi->s_nls_map = uopt.nls_map;
 
 	/* Set the block size for all transfers */
 	if (!udf_set_blocksize(sb, uopt.blocksize))
 		goto error_out;
 
 	if (uopt.session == 0xFFFFFFFF)
-		UDF_SB_SESSION(sb) = udf_get_last_session(sb);
+		sbi->s_session = udf_get_last_session(sb);
 	else
-		UDF_SB_SESSION(sb) = uopt.session;
+		sbi->s_session = uopt.session;
 
-	udf_debug("Multi-session=%d\n", UDF_SB_SESSION(sb));
+	udf_debug("Multi-session=%d\n", sbi->s_session);
 
-	UDF_SB_LASTBLOCK(sb) = uopt.lastblock;
-	UDF_SB_ANCHOR(sb)[0] = UDF_SB_ANCHOR(sb)[1] = 0;
-	UDF_SB_ANCHOR(sb)[2] = uopt.anchor;
-	UDF_SB_ANCHOR(sb)[3] = 256;
+	sbi->s_last_block = uopt.lastblock;
+	sbi->s_anchor[0] = sbi->s_anchor[1] = 0;
+	sbi->s_anchor[2] = uopt.anchor;
+	sbi->s_anchor[3] = 256;
 
 	if (udf_check_valid(sb, uopt.novrs, silent)) {
 		/* read volume recognition sequences */
@@ -1537,23 +1583,24 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 		goto error_out;
 	}
 
-	udf_debug("Lastblock=%d\n", UDF_SB_LASTBLOCK(sb));
+	udf_debug("Lastblock=%d\n", sbi->s_last_block);
 
-	if (UDF_SB_LVIDBH(sb)) {
-		uint16_t minUDFReadRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev);
-		uint16_t minUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev);
-		/* uint16_t maxUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev); */
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sbi);
+		uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
+		uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
+		/* uint16_t maxUDFWriteRev = le16_to_cpu(lvidiu->maxUDFWriteRev); */
 
 		if (minUDFReadRev > UDF_MAX_READ_VERSION) {
 			printk(KERN_ERR "UDF-fs: minUDFReadRev=%x (max is %x)\n",
-			       le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev),
+			       le16_to_cpu(lvidiu->minUDFReadRev),
 			       UDF_MAX_READ_VERSION);
 			goto error_out;
 		} else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) {
 			sb->s_flags |= MS_RDONLY;
 		}
 
-		UDF_SB_UDFREV(sb) = minUDFWriteRev;
+		sbi->s_udfrev = minUDFWriteRev;
 
 		if (minUDFReadRev >= UDF_VERS_USE_EXTENDED_FE)
 			UDF_SET_FLAG(sb, UDF_FLAG_USE_EXTENDED_FE);
@@ -1561,12 +1608,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 			UDF_SET_FLAG(sb, UDF_FLAG_USE_STREAMS);
 	}
 
-	if (!UDF_SB_NUMPARTS(sb)) {
+	if (!sbi->s_partitions) {
 		printk(KERN_WARNING "UDF-fs: No partition found (2)\n");
 		goto error_out;
 	}
 
-	if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY) {
+	if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & UDF_PART_FLAG_READ_ONLY) {
 		printk(KERN_NOTICE "UDF-fs: Partition marked readonly; forcing readonly mount\n");
 		sb->s_flags |= MS_RDONLY;
 	}
@@ -1578,12 +1625,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 
 	if (!silent) {
 		kernel_timestamp ts;
-		udf_time_to_stamp(&ts, UDF_SB_RECORDTIME(sb));
+		udf_time_to_stamp(&ts, sbi->s_record_time);
 		udf_info("UDF %s (%s) Mounting volume '%s', "
 			 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
 			 UDFFS_VERSION, UDFFS_DATE,
-			 UDF_SB_VOLIDENT(sb), ts.year, ts.month, ts.day, ts.hour, ts.minute,
-			 ts.typeAndTimezone);
+			 sbi->s_volume_ident, ts.year, ts.month, ts.day,
+			 ts.hour, ts.minute, ts.typeAndTimezone);
 	}
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_open_lvid(sb);
@@ -1609,30 +1656,31 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	return 0;
 
 error_out:
-	if (UDF_SB_VAT(sb))
-		iput(UDF_SB_VAT(sb));
-	if (UDF_SB_NUMPARTS(sb)) {
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE)
-			iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE)
-			iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP)
-			UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_uspace);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP)
-			UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_fspace);
-		if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) {
+	if (sbi->s_vat_inode)
+		iput(sbi->s_vat_inode);
+	if (sbi->s_partitions) {
+		struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition];
+		if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
+			iput(map->s_uspace.s_table);
+		if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
+			iput(map->s_fspace.s_table);
+		if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
+			UDF_SB_FREE_BITMAP(sb, sbi->s_partition, s_uspace);
+		if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
+			UDF_SB_FREE_BITMAP(sb, sbi->s_partition, s_fspace);
+		if (map->s_partition_type == UDF_SPARABLE_MAP15)
 			for (i = 0; i < 4; i++)
-				brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]);
-		}
+				brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
 	}
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
-		unload_nls(UDF_SB(sb)->s_nls_map);
+		unload_nls(sbi->s_nls_map);
 #endif
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_close_lvid(sb);
-	brelse(UDF_SB_LVIDBH(sb));
-	UDF_SB_FREE(sb);
+	brelse(sbi->s_lvid_bh);
+
+	kfree(sbi->s_partmaps);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
 
@@ -1683,31 +1731,33 @@ void udf_warning(struct super_block *sb, const char *function,
 static void udf_put_super(struct super_block *sb)
 {
 	int i;
+	struct udf_sb_info *sbi;
 
-	if (UDF_SB_VAT(sb))
-		iput(UDF_SB_VAT(sb));
-	if (UDF_SB_NUMPARTS(sb)) {
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE)
-			iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE)
-			iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP)
-			UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_uspace);
-		if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP)
-			UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_fspace);
-		if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) {
+	sbi = UDF_SB(sb);
+	if (sbi->s_vat_inode)
+		iput(sbi->s_vat_inode);
+	if (sbi->s_partitions) {
+		struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition];
+		if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
+			iput(map->s_uspace.s_table);
+		if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
+			iput(map->s_fspace.s_table);
+		if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
+			UDF_SB_FREE_BITMAP(sb, sbi->s_partition, s_uspace);
+		if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
+			UDF_SB_FREE_BITMAP(sb, sbi->s_partition, s_fspace);
+		if (map->s_partition_type == UDF_SPARABLE_MAP15)
 			for (i = 0; i < 4; i++)
-				brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]);
-		}
+				brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
 	}
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
-		unload_nls(UDF_SB(sb)->s_nls_map);
+		unload_nls(sbi->s_nls_map);
 #endif
 	if (!(sb->s_flags & MS_RDONLY))
 		udf_close_lvid(sb);
-	brelse(UDF_SB_LVIDBH(sb));
-	UDF_SB_FREE(sb);
+	brelse(sbi->s_lvid_bh);
+	kfree(sbi->s_partmaps);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
 }
@@ -1728,15 +1778,22 @@ static void udf_put_super(struct super_block *sb)
 static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
+	struct udf_sb_info *sbi = UDF_SB(sb);
+	struct logicalVolIntegrityDescImpUse *lvidiu;
+
+	if (sbi->s_lvid_bh != NULL)
+		lvidiu = udf_sb_lvidiu(sbi);
+	else
+		lvidiu = NULL;
 
 	buf->f_type = UDF_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb));
+	buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len;
 	buf->f_bfree = udf_count_free(sb);
 	buf->f_bavail = buf->f_bfree;
-	buf->f_files = (UDF_SB_LVIDBH(sb) ?
-			(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) +
-			 le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs)) : 0) + buf->f_bfree;
+	buf->f_files = (lvidiu != NULL ? (le32_to_cpu(lvidiu->numFiles) +
+					  le32_to_cpu(lvidiu->numDirs)) : 0)
+			+ buf->f_bfree;
 	buf->f_ffree = buf->f_bfree;
 	/* __kernel_fsid_t f_fsid */
 	buf->f_namelen = UDF_NAME_LEN - 2;
@@ -1764,7 +1821,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, struct udf_bit
 	lock_kernel();
 
 	loc.logicalBlockNum = bitmap->s_extPosition;
-	loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
+	loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
 	bh = udf_read_ptagged(sb, loc, 0, &ident);
 
 	if (!bh) {
@@ -1836,10 +1893,14 @@ static unsigned int udf_count_free_table(struct super_block *sb, struct inode *t
 static unsigned int udf_count_free(struct super_block *sb)
 {
 	unsigned int accum = 0;
+	struct udf_sb_info *sbi;
+	struct udf_part_map *map;
 
-	if (UDF_SB_LVIDBH(sb)) {
-		if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb)) {
-			accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]);
+	sbi = UDF_SB(sb);
+	if (sbi->s_lvid_bh) {
+		struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
+		if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) {
+			accum = le32_to_cpu(lvid->freeSpaceTable[sbi->s_partition]);
 			if (accum == 0xFFFFFFFF)
 				accum = 0;
 		}
@@ -1848,24 +1909,25 @@ static unsigned int udf_count_free(struct super_block *sb)
 	if (accum)
 		return accum;
 
-	if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) {
+	map = &sbi->s_partmaps[sbi->s_partition];
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
 		accum += udf_count_free_bitmap(sb,
-					       UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_bitmap);
+					       map->s_uspace.s_bitmap);
 	}
-	if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) {
+	if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) {
 		accum += udf_count_free_bitmap(sb,
-					       UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_bitmap);
+					       map->s_fspace.s_bitmap);
 	}
 	if (accum)
 		return accum;
 
-	if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) {
+	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) {
 		accum += udf_count_free_table(sb,
-					      UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table);
+					      map->s_uspace.s_table);
 	}
-	if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) {
+	if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) {
 		accum += udf_count_free_table(sb,
-					      UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table);
+					      map->s_fspace.s_table);
 	}
 
 	return accum;
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 7fc3912885a5..6931f6bfa1ae 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -163,7 +163,7 @@ void udf_discard_prealloc(struct inode *inode)
 				cpu_to_le32(epos.offset -
 					    sizeof(struct allocExtDesc));
 			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
-			    UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			    UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
 				udf_update_tag(epos.bh->b_data, epos.offset);
 			else
 				udf_update_tag(epos.bh->b_data,
@@ -184,6 +184,7 @@ void udf_truncate_extents(struct inode *inode)
 	uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
 	int8_t etype;
 	struct super_block *sb = inode->i_sb;
+	struct udf_sb_info *sbi = UDF_SB(sb);
 	sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset;
 	loff_t byte_offset;
 	int adsize;
@@ -232,7 +233,7 @@ void udf_truncate_extents(struct inode *inode)
 						aed->lengthAllocDescs =
 						    cpu_to_le32(lenalloc);
 						if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) ||
-						    UDF_SB_UDFREV(sb) >= 0x0201)
+						    sbi->s_udfrev >= 0x0201)
 							udf_update_tag(epos.bh->b_data,
 								       lenalloc +
 								       sizeof(struct allocExtDesc));
@@ -271,7 +272,7 @@ void udf_truncate_extents(struct inode *inode)
 				    (struct allocExtDesc *)(epos.bh->b_data);
 				aed->lengthAllocDescs = cpu_to_le32(lenalloc);
 				if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) ||
-				    UDF_SB_UDFREV(sb) >= 0x0201)
+				    sbi->s_udfrev >= 0x0201)
 					udf_update_tag(epos.bh->b_data,
 						       lenalloc + sizeof(struct allocExtDesc));
 				else
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 3c2982017c6d..92e6d75b0163 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -41,40 +41,36 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
-#define UDF_SB_FREE(X)\
-{\
-	if (UDF_SB(X)) {\
-		kfree(UDF_SB_PARTMAPS(X));\
-		UDF_SB_PARTMAPS(X) = NULL;\
-	}\
-}
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
 
 #define UDF_SB_ALLOC_PARTMAPS(X,Y)\
 {\
-	UDF_SB_PARTMAPS(X) = kmalloc(sizeof(struct udf_part_map) * Y, GFP_KERNEL);\
-	if (UDF_SB_PARTMAPS(X) != NULL) {\
-		UDF_SB_NUMPARTS(X) = Y;\
-		memset(UDF_SB_PARTMAPS(X), 0x00, sizeof(struct udf_part_map) * Y);\
+	struct udf_sb_info *sbi = UDF_SB(X);\
+	sbi->s_partmaps = kmalloc(sizeof(struct udf_part_map) * Y, GFP_KERNEL);\
+	if (sbi->s_partmaps != NULL) {\
+		sbi->s_partitions = Y;\
+		memset(sbi->s_partmaps, 0x00, sizeof(struct udf_part_map) * Y);\
 	} else {\
-		UDF_SB_NUMPARTS(X) = 0;\
+		sbi->s_partitions = 0;\
 		udf_error(X, __FUNCTION__, "Unable to allocate space for %d partition maps", Y);\
 	}\
 }
 
 #define UDF_SB_ALLOC_BITMAP(X,Y,Z)\
 {\
-	int nr_groups = ((UDF_SB_PARTLEN((X),(Y)) + (sizeof(struct spaceBitmapDesc) << 3) +\
+	struct udf_sb_info *sbi = UDF_SB(X);\
+	int nr_groups = ((sbi->s_partmaps[(Y)].s_partition_len + (sizeof(struct spaceBitmapDesc) << 3) +\
 		((X)->s_blocksize * 8) - 1) / ((X)->s_blocksize * 8));\
 	int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) * nr_groups);\
 	if (size <= PAGE_SIZE)\
-		UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap = kmalloc(size, GFP_KERNEL);\
+		sbi->s_partmaps[(Y)].Z.s_bitmap = kmalloc(size, GFP_KERNEL);\
 	else\
-		UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap = vmalloc(size);\
-	if (UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap != NULL) {\
-		memset(UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap, 0x00, size);\
-		UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_block_bitmap =\
-			(struct buffer_head **)(UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap + 1);\
-		UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_nr_groups = nr_groups;\
+		sbi->s_partmaps[(Y)].Z.s_bitmap = vmalloc(size);\
+	if (sbi->s_partmaps[(Y)].Z.s_bitmap != NULL) {\
+		memset(sbi->s_partmaps[(Y)].Z.s_bitmap, 0x00, size);\
+		sbi->s_partmaps[(Y)].Z.s_bitmap->s_block_bitmap =\
+			(struct buffer_head **)(sbi->s_partmaps[(Y)].Z.s_bitmap + 1);\
+		sbi->s_partmaps[(Y)].Z.s_bitmap->s_nr_groups = nr_groups;\
 	} else {\
 		udf_error(X, __FUNCTION__, "Unable to allocate space for bitmap and %d buffer_head pointers", nr_groups);\
 	}\
@@ -90,47 +86,16 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
 			brelse(UDF_SB_BITMAP(X,Y,Z,i));\
 	}\
 	if (size <= PAGE_SIZE)\
-		kfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\
+		kfree(UDF_SB(X)->s_partmaps[Y].Z.s_bitmap);\
 	else\
-		vfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\
+		vfree(UDF_SB(X)->s_partmaps[Y].Z.s_bitmap);\
 }
 
 #define UDF_QUERY_FLAG(X,Y)			( UDF_SB(X)->s_flags & ( 1 << (Y) ) )
 #define UDF_SET_FLAG(X,Y)			( UDF_SB(X)->s_flags |= ( 1 << (Y) ) )
 #define UDF_CLEAR_FLAG(X,Y)			( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) )
 
-#define UDF_UPDATE_UDFREV(X,Y)			( ((Y) > UDF_SB_UDFREV(X)) ? UDF_SB_UDFREV(X) = (Y) : UDF_SB_UDFREV(X) )
-
-#define UDF_SB_PARTMAPS(X)			( UDF_SB(X)->s_partmaps )
-#define UDF_SB_PARTTYPE(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_type )
-#define UDF_SB_PARTROOT(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_root )
-#define UDF_SB_PARTLEN(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_len )
-#define UDF_SB_PARTVSN(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_volumeseqnum )
-#define UDF_SB_PARTNUM(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_num )
-#define UDF_SB_TYPESPAR(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_sparing )
-#define UDF_SB_TYPEVIRT(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_virtual )
-#define UDF_SB_PARTFUNC(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_func )
-#define UDF_SB_PARTFLAGS(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_flags )
-#define UDF_SB_BITMAP(X,Y,Z,I)			( UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_block_bitmap[I] )
-#define UDF_SB_BITMAP_NR_GROUPS(X,Y,Z)		( UDF_SB_PARTMAPS(X)[(Y)].Z.s_bitmap->s_nr_groups )
-
-#define UDF_SB_VOLIDENT(X)			( UDF_SB(X)->s_volident )
-#define UDF_SB_NUMPARTS(X)			( UDF_SB(X)->s_partitions )
-#define UDF_SB_PARTITION(X)			( UDF_SB(X)->s_partition )
-#define UDF_SB_SESSION(X)			( UDF_SB(X)->s_session )
-#define UDF_SB_ANCHOR(X)			( UDF_SB(X)->s_anchor )
-#define UDF_SB_LASTBLOCK(X)			( UDF_SB(X)->s_lastblock )
-#define UDF_SB_LVIDBH(X)			( UDF_SB(X)->s_lvidbh )
-#define UDF_SB_LVID(X)				( (struct logicalVolIntegrityDesc *)UDF_SB_LVIDBH(X)->b_data )
-#define UDF_SB_LVIDIU(X)			( (struct logicalVolIntegrityDescImpUse *)&(UDF_SB_LVID(X)->impUse[le32_to_cpu(UDF_SB_LVID(X)->numOfPartitions) * 2 * sizeof(uint32_t)/sizeof(uint8_t)]) )
-
-#define UDF_SB_UMASK(X)				( UDF_SB(X)->s_umask )
-#define UDF_SB_GID(X)				( UDF_SB(X)->s_gid )
-#define UDF_SB_UID(X)				( UDF_SB(X)->s_uid )
-#define UDF_SB_RECORDTIME(X)			( UDF_SB(X)->s_recordtime )
-#define UDF_SB_SERIALNUM(X)			( UDF_SB(X)->s_serialnum )
-#define UDF_SB_UDFREV(X)			( UDF_SB(X)->s_udfrev )
-#define UDF_SB_FLAGS(X)				( UDF_SB(X)->s_flags )
-#define UDF_SB_VAT(X)				( UDF_SB(X)->s_vat )
+#define UDF_SB_BITMAP(X,Y,Z,I)			( UDF_SB(X)->s_partmaps[(Y)].Z.s_bitmap->s_block_bitmap[I] )
+#define UDF_SB_BITMAP_NR_GROUPS(X,Y,Z)		( UDF_SB(X)->s_partmaps[(Y)].Z.s_bitmap->s_nr_groups )
 
 #endif /* __LINUX_UDF_SB_H */
diff --git a/include/linux/udf_fs_sb.h b/include/linux/udf_fs_sb.h
index 80ae9ef940dc..9bc47352b6b4 100644
--- a/include/linux/udf_fs_sb.h
+++ b/include/linux/udf_fs_sb.h
@@ -75,7 +75,7 @@ struct udf_part_map
 struct udf_sb_info
 {
 	struct udf_part_map	*s_partmaps;
-	__u8			s_volident[32];
+	__u8			s_volume_ident[32];
 
 	/* Overall info */
 	__u16			s_partitions;
@@ -84,9 +84,9 @@ struct udf_sb_info
 	/* Sector headers */
 	__s32			s_session;
 	__u32			s_anchor[4];
-	__u32			s_lastblock;
+	__u32			s_last_block;
 
-	struct buffer_head	*s_lvidbh;
+	struct buffer_head	*s_lvid_bh;
 
 	/* Default permissions */
 	mode_t			s_umask;
@@ -94,10 +94,10 @@ struct udf_sb_info
 	uid_t			s_uid;
 
 	/* Root Info */
-	struct timespec		s_recordtime;
+	struct timespec		s_record_time;
 
 	/* Fileset Info */
-	__u16			s_serialnum;
+	__u16			s_serial_number;
 
 	/* highest UDF revision we have recorded to this media */
 	__u16			s_udfrev;
@@ -109,7 +109,7 @@ struct udf_sb_info
 	struct nls_table	*s_nls_map;
 
 	/* VAT inode */
-	struct inode		*s_vat;
+	struct inode		*s_vat_inode;
 
 	struct mutex		s_alloc_mutex;
 };
-- 
cgit 


From 756fa92f4d725698ffa4ac1faeb8f4e8cdb6cd95 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Fri, 8 Feb 2008 04:20:46 -0800
Subject: udf: fix udf_debug macro

udf_debug should be enclosed with do { } while (0)
to be safely used in code like below:
if (something)
	udf_debug();
else
	anything;
(Otherwise compiler will not compile it with:
"error: expected expression before 'else'")

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/udf_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udf_fs.h b/include/linux/udf_fs.h
index 36c684e1b110..c954527a41be 100644
--- a/include/linux/udf_fs.h
+++ b/include/linux/udf_fs.h
@@ -39,11 +39,11 @@
 
 #ifdef UDFFS_DEBUG
 #define udf_debug(f, a...) \
-	{ \
+	do { \
 		printk (KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \
 			__FILE__, __LINE__, __FUNCTION__); \
 		printk (f, ##a); \
-	}
+	} while (0)
 #else
 #define udf_debug(f, a...) /**/
 #endif
-- 
cgit 


From a9ca663578321695658675103c35452d8ce91d85 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 8 Feb 2008 04:20:47 -0800
Subject: kill UDFFS_{DATE,VERSION}

Printing date and version of a driver makes sense if there's a maintainer
who's maintaining and using these, but printing ancient version information
only confuses users.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/udf/super.c         | 3 +--
 include/linux/udf_fs.h | 3 ---
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 2048351c56c0..3afe7647f94a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1792,9 +1792,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	if (!silent) {
 		kernel_timestamp ts;
 		udf_time_to_stamp(&ts, sbi->s_record_time);
-		udf_info("UDF %s (%s) Mounting volume '%s', "
+		udf_info("UDF: Mounting volume '%s', "
 			 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
-			 UDFFS_VERSION, UDFFS_DATE,
 			 sbi->s_volume_ident, ts.year, ts.month, ts.day,
 			 ts.hour, ts.minute, ts.typeAndTimezone);
 	}
diff --git a/include/linux/udf_fs.h b/include/linux/udf_fs.h
index c954527a41be..aa88654eb76b 100644
--- a/include/linux/udf_fs.h
+++ b/include/linux/udf_fs.h
@@ -32,9 +32,6 @@
 #define UDF_PREALLOCATE
 #define UDF_DEFAULT_PREALLOC_BLOCKS	8
 
-#define UDFFS_DATE			"2004/29/09"
-#define UDFFS_VERSION			"0.9.8.1"
-
 #undef UDFFS_DEBUG
 
 #ifdef UDFFS_DEBUG
-- 
cgit 


From b4bd7d59451960d4e1d994c01581b31b08fe3720 Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck <wim@iguana.be>
Date: Fri, 8 Feb 2008 04:20:58 -0800
Subject: SMBIOS/DMI: add type 41 = Onboard Devices Extended Information

From version 2.6 of the SMBIOS standard, type 10 (On Board Devices
Information) becomes obsolete.  The reason for this is that no further
fields can be added to this structure without adversely affecting existing
software's ability to properly parse the data.

Therefore type 41 (Onboard Devices Extended Information) was added.
The structure is as follows:

struct smbios_type_41 {
	u8 type;
	u8 length;
	u16 handle;
	u8 reference_designation_string;
	u8 device_type;		/* same device type as in type 10 */
	u8 device_type_instance;
	u16 segment_group_number;
	u8 bus_number;
	u8 device_function_number;
};

For more info: http://www.dmtf.org/standards/smbios

Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/dmi_scan.c | 25 +++++++++++++++++++++++++
 include/linux/dmi.h         |  5 ++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 1412d7bcdbd1..653265a40b7f 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -250,6 +250,28 @@ static void __init dmi_save_ipmi_device(const struct dmi_header *dm)
 	list_add(&dev->list, &dmi_devices);
 }
 
+static void __init dmi_save_extended_devices(const struct dmi_header *dm)
+{
+	const u8 *d = (u8*) dm + 5;
+	struct dmi_device *dev;
+
+	/* Skip disabled device */
+	if ((*d & 0x80) == 0)
+		return;
+
+	dev = dmi_alloc(sizeof(*dev));
+	if (!dev) {
+		printk(KERN_ERR "dmi_save_extended_devices: out of memory.\n");
+		return;
+	}
+
+	dev->type = *d-- & 0x7f;
+	dev->name = dmi_string(dm, *d);
+	dev->device_data = NULL;
+
+	list_add(&dev->list, &dmi_devices);
+}
+
 /*
  *	Process a DMI table entry. Right now all we care about are the BIOS
  *	and machine entries. For 2.5 we should pull the smbus controller info
@@ -292,6 +314,9 @@ static void __init dmi_decode(const struct dmi_header *dm)
 		break;
 	case 38:	/* IPMI Device Information */
 		dmi_save_ipmi_device(dm);
+		break;
+	case 41:	/* Onboard Devices Extended Information */
+		dmi_save_extended_devices(dm);
 	}
 }
 
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index bbc9992ec374..325acdf5c462 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -35,8 +35,11 @@ enum dmi_device_type {
 	DMI_DEV_TYPE_ETHERNET,
 	DMI_DEV_TYPE_TOKENRING,
 	DMI_DEV_TYPE_SOUND,
+	DMI_DEV_TYPE_PATA,
+	DMI_DEV_TYPE_SATA,
+	DMI_DEV_TYPE_SAS,
 	DMI_DEV_TYPE_IPMI = -1,
-	DMI_DEV_TYPE_OEM_STRING = -2
+	DMI_DEV_TYPE_OEM_STRING = -2,
 };
 
 struct dmi_header {
-- 
cgit 


From 6d141c3ff6d74cc30cdbf26155842756ac16cf7f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 8 Feb 2008 04:21:09 -0800
Subject: workqueue: make delayed_work_timer_fn() static

delayed_work_timer_fn() is a timer function, make it static.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/timer.h | 2 --
 kernel/workqueue.c    | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index ffe438a901b5..979fefdeb862 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -124,8 +124,6 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
 }
 #endif
 
-extern void delayed_work_timer_fn(unsigned long __data);
-
 /**
  * add_timer - start a timer
  * @timer: the timer to be added
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3f168e00ce5b..ff06611655af 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -175,7 +175,7 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
-void delayed_work_timer_fn(unsigned long __data)
+static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
-- 
cgit 


From 9a1e8eb1f0b76b5e72a2343ad881c81b08dd6410 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 8 Feb 2008 04:21:21 -0800
Subject: Basic PWM driver for AVR32 and AT91

PWM device setup, and a simple PWM driver exposing a programming interface
giving access to each channel's full capabilities.  Note that this doesn't
support starting several channels in synch.

[hskinnemoen@atmel.com: allocate platform device dynamically]
[hskinnemoen@atmel.com: Kconfig fix]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andrew Victor <linux@maxim.org.za>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/mach-at32ap/at32ap700x.c   |  54 +++++
 drivers/misc/Kconfig                  |   9 +
 drivers/misc/Makefile                 |   1 +
 drivers/misc/atmel_pwm.c              | 409 ++++++++++++++++++++++++++++++++++
 include/asm-avr32/arch-at32ap/board.h |   3 +
 include/linux/atmel_pwm.h             |  70 ++++++
 6 files changed, 546 insertions(+)
 create mode 100644 drivers/misc/atmel_pwm.c
 create mode 100644 include/linux/atmel_pwm.h

(limited to 'include/linux')

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 14e61f05e1f6..7678fee9a885 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1185,6 +1185,59 @@ err_dup_modedb:
 }
 #endif
 
+/* --------------------------------------------------------------------
+ *  PWM
+ * -------------------------------------------------------------------- */
+static struct resource atmel_pwm0_resource[] __initdata = {
+	PBMEM(0xfff01400),
+	IRQ(24),
+};
+static struct clk atmel_pwm0_mck = {
+	.name		= "mck",
+	.parent		= &pbb_clk,
+	.mode		= pbb_clk_mode,
+	.get_rate	= pbb_clk_get_rate,
+	.index		= 5,
+};
+
+struct platform_device *__init at32_add_device_pwm(u32 mask)
+{
+	struct platform_device *pdev;
+
+	if (!mask)
+		return NULL;
+
+	pdev = platform_device_alloc("atmel_pwm", 0);
+	if (!pdev)
+		return NULL;
+
+	if (platform_device_add_resources(pdev, atmel_pwm0_resource,
+				ARRAY_SIZE(atmel_pwm0_resource)))
+		goto out_free_pdev;
+
+	if (platform_device_add_data(pdev, &mask, sizeof(mask)))
+		goto out_free_pdev;
+
+	if (mask & (1 << 0))
+		select_peripheral(PA(28), PERIPH_A, 0);
+	if (mask & (1 << 1))
+		select_peripheral(PA(29), PERIPH_A, 0);
+	if (mask & (1 << 2))
+		select_peripheral(PA(21), PERIPH_B, 0);
+	if (mask & (1 << 3))
+		select_peripheral(PA(22), PERIPH_B, 0);
+
+	atmel_pwm0_mck.dev = &pdev->dev;
+
+	platform_device_add(pdev);
+
+	return pdev;
+
+out_free_pdev:
+	platform_device_put(pdev);
+	return NULL;
+}
+
 /* --------------------------------------------------------------------
  *  SSC
  * -------------------------------------------------------------------- */
@@ -1646,6 +1699,7 @@ struct clk *at32_clock_list[] = {
 	&atmel_usart1_usart,
 	&atmel_usart2_usart,
 	&atmel_usart3_usart,
+	&atmel_pwm0_mck,
 #if defined(CONFIG_CPU_AT32AP7000)
 	&macb0_hclk,
 	&macb0_pclk,
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 7b5220ca7d7f..1941587a7aa2 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -13,6 +13,15 @@ menuconfig MISC_DEVICES
 
 if MISC_DEVICES
 
+config ATMEL_PWM
+	tristate "Atmel AT32/AT91 PWM support"
+	depends on AVR32 || ARCH_AT91
+	help
+	  This option enables device driver support for the PWM channels
+	  on certain Atmel prcoessors.  Pulse Width Modulation is used for
+	  purposes including software controlled power-efficent backlights
+	  on LCD displays, motor control, and waveform generation.
+
 config IBM_ASM
 	tristate "Device driver for IBM RSA service processor"
 	depends on X86 && PCI && INPUT && EXPERIMENTAL
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 7f13549cc87e..3b12f5da8562 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_HDPU_FEATURES)	+= hdpuftrs/
 obj-$(CONFIG_MSI_LAPTOP)     += msi-laptop.o
 obj-$(CONFIG_ACER_WMI)     += acer-wmi.o
 obj-$(CONFIG_ASUS_LAPTOP)     += asus-laptop.o
+obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_TC1100_WMI)	+= tc1100-wmi.o
 obj-$(CONFIG_LKDTM)		+= lkdtm.o
diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
new file mode 100644
index 000000000000..f8d3b9a76cbd
--- /dev/null
+++ b/drivers/misc/atmel_pwm.c
@@ -0,0 +1,409 @@
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/atmel_pwm.h>
+
+
+/*
+ * This is a simple driver for the PWM controller found in various newer
+ * Atmel SOCs, including the AVR32 series and the AT91sam9263.
+ *
+ * Chips with current Linux ports have only 4 PWM channels, out of max 32.
+ * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux).
+ * Docs are inconsistent about the width of the channel counter registers;
+ * it's at least 16 bits, but several places say 20 bits.
+ */
+#define	PWM_NCHAN	4		/* max 32 */
+
+struct pwm {
+	spinlock_t		lock;
+	struct platform_device	*pdev;
+	u32			mask;
+	int			irq;
+	void __iomem		*base;
+	struct clk		*clk;
+	struct pwm_channel	*channel[PWM_NCHAN];
+	void			(*handler[PWM_NCHAN])(struct pwm_channel *);
+};
+
+
+/* global PWM controller registers */
+#define PWM_MR		0x00
+#define PWM_ENA		0x04
+#define PWM_DIS		0x08
+#define PWM_SR		0x0c
+#define PWM_IER		0x10
+#define PWM_IDR		0x14
+#define PWM_IMR		0x18
+#define PWM_ISR		0x1c
+
+static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val)
+{
+	__raw_writel(val, p->base + offset);
+}
+
+static inline u32 pwm_readl(const struct pwm *p, unsigned offset)
+{
+	return __raw_readl(p->base + offset);
+}
+
+static inline void __iomem *pwmc_regs(const struct pwm *p, int index)
+{
+	return p->base + 0x200 + index * 0x20;
+}
+
+static struct pwm *pwm;
+
+static void pwm_dumpregs(struct pwm_channel *ch, char *tag)
+{
+	struct device	*dev = &pwm->pdev->dev;
+
+	dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n",
+		tag,
+		pwm_readl(pwm, PWM_MR),
+		pwm_readl(pwm, PWM_SR),
+		pwm_readl(pwm, PWM_IMR));
+	dev_dbg(dev,
+		"pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n",
+		ch->index,
+		pwm_channel_readl(ch, PWM_CMR),
+		pwm_channel_readl(ch, PWM_CDTY),
+		pwm_channel_readl(ch, PWM_CPRD),
+		pwm_channel_readl(ch, PWM_CCNT));
+}
+
+
+/**
+ * pwm_channel_alloc - allocate an unused PWM channel
+ * @index: identifies the channel
+ * @ch: structure to be initialized
+ *
+ * Drivers allocate PWM channels according to the board's wiring, and
+ * matching board-specific setup code.  Returns zero or negative errno.
+ */
+int pwm_channel_alloc(int index, struct pwm_channel *ch)
+{
+	unsigned long	flags;
+	int		status = 0;
+
+	/* insist on PWM init, with this signal pinned out */
+	if (!pwm || !(pwm->mask & 1 << index))
+		return -ENODEV;
+
+	if (index < 0 || index >= PWM_NCHAN || !ch)
+		return -EINVAL;
+	memset(ch, 0, sizeof *ch);
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	if (pwm->channel[index])
+		status = -EBUSY;
+	else {
+		clk_enable(pwm->clk);
+
+		ch->regs = pwmc_regs(pwm, index);
+		ch->index = index;
+
+		/* REVISIT: ap7000 seems to go 2x as fast as we expect!! */
+		ch->mck = clk_get_rate(pwm->clk);
+
+		pwm->channel[index] = ch;
+		pwm->handler[index] = NULL;
+
+		/* channel and irq are always disabled when we return */
+		pwm_writel(pwm, PWM_DIS, 1 << index);
+		pwm_writel(pwm, PWM_IDR, 1 << index);
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return status;
+}
+EXPORT_SYMBOL(pwm_channel_alloc);
+
+static int pwmcheck(struct pwm_channel *ch)
+{
+	int		index;
+
+	if (!pwm)
+		return -ENODEV;
+	if (!ch)
+		return -EINVAL;
+	index = ch->index;
+	if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch)
+		return -EINVAL;
+
+	return index;
+}
+
+/**
+ * pwm_channel_free - release a previously allocated channel
+ * @ch: the channel being released
+ *
+ * The channel is completely shut down (counter and IRQ disabled),
+ * and made available for re-use.  Returns zero, or negative errno.
+ */
+int pwm_channel_free(struct pwm_channel *ch)
+{
+	unsigned long	flags;
+	int		t;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm->channel[t] = NULL;
+		pwm->handler[t] = NULL;
+
+		/* channel and irq are always disabled when we return */
+		pwm_writel(pwm, PWM_DIS, 1 << t);
+		pwm_writel(pwm, PWM_IDR, 1 << t);
+
+		clk_disable(pwm->clk);
+		t = 0;
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return t;
+}
+EXPORT_SYMBOL(pwm_channel_free);
+
+int __pwm_channel_onoff(struct pwm_channel *ch, int enabled)
+{
+	unsigned long	flags;
+	int		t;
+
+	/* OMITTED FUNCTIONALITY:  starting several channels in synch */
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t);
+		t = 0;
+		pwm_dumpregs(ch, enabled ? "enable" : "disable");
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+
+	return t;
+}
+EXPORT_SYMBOL(__pwm_channel_onoff);
+
+/**
+ * pwm_clk_alloc - allocate and configure CLKA or CLKB
+ * @prescale: from 0..10, the power of two used to divide MCK
+ * @div: from 1..255, the linear divisor to use
+ *
+ * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno.  The allocated
+ * clock will run with a period of (2^prescale * div) / MCK, or twice as
+ * long if center aligned PWM output is used.  The clock must later be
+ * deconfigured using pwm_clk_free().
+ */
+int pwm_clk_alloc(unsigned prescale, unsigned div)
+{
+	unsigned long	flags;
+	u32		mr;
+	u32		val = (prescale << 8) | div;
+	int		ret = -EBUSY;
+
+	if (prescale >= 10 || div == 0 || div > 255)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	mr = pwm_readl(pwm, PWM_MR);
+	if ((mr & 0xffff) == 0) {
+		mr |= val;
+		ret = PWM_CPR_CLKA;
+	}
+	if ((mr & (0xffff << 16)) == 0) {
+		mr |= val << 16;
+		ret = PWM_CPR_CLKB;
+	}
+	if (ret > 0)
+		pwm_writel(pwm, PWM_MR, mr);
+	spin_unlock_irqrestore(&pwm->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(pwm_clk_alloc);
+
+/**
+ * pwm_clk_free - deconfigure and release CLKA or CLKB
+ *
+ * Reverses the effect of pwm_clk_alloc().
+ */
+void pwm_clk_free(unsigned clk)
+{
+	unsigned long	flags;
+	u32		mr;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	mr = pwm_readl(pwm, PWM_MR);
+	if (clk == PWM_CPR_CLKA)
+		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0));
+	if (clk == PWM_CPR_CLKB)
+		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16));
+	spin_unlock_irqrestore(&pwm->lock, flags);
+}
+EXPORT_SYMBOL(pwm_clk_free);
+
+/**
+ * pwm_channel_handler - manage channel's IRQ handler
+ * @ch: the channel
+ * @handler: the handler to use, possibly NULL
+ *
+ * If the handler is non-null, the handler will be called after every
+ * period of this PWM channel.  If the handler is null, this channel
+ * won't generate an IRQ.
+ */
+int pwm_channel_handler(struct pwm_channel *ch,
+		void (*handler)(struct pwm_channel *ch))
+{
+	unsigned long	flags;
+	int		t;
+
+	spin_lock_irqsave(&pwm->lock, flags);
+	t = pwmcheck(ch);
+	if (t >= 0) {
+		pwm->handler[t] = handler;
+		pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t);
+		t = 0;
+	}
+	spin_unlock_irqrestore(&pwm->lock, flags);
+
+	return t;
+}
+EXPORT_SYMBOL(pwm_channel_handler);
+
+static irqreturn_t pwm_irq(int id, void *_pwm)
+{
+	struct pwm	*p = _pwm;
+	irqreturn_t	handled = IRQ_NONE;
+	u32		irqstat;
+	int		index;
+
+	spin_lock(&p->lock);
+
+	/* ack irqs, then handle them */
+	irqstat = pwm_readl(pwm, PWM_ISR);
+
+	while (irqstat) {
+		struct pwm_channel *ch;
+		void (*handler)(struct pwm_channel *ch);
+
+		index = ffs(irqstat) - 1;
+		irqstat &= ~(1 << index);
+		ch = pwm->channel[index];
+		handler = pwm->handler[index];
+		if (handler && ch) {
+			spin_unlock(&p->lock);
+			handler(ch);
+			spin_lock(&p->lock);
+			handled = IRQ_HANDLED;
+		}
+	}
+
+	spin_unlock(&p->lock);
+	return handled;
+}
+
+static int __init pwm_probe(struct platform_device *pdev)
+{
+	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+	u32 *mp = pdev->dev.platform_data;
+	struct pwm *p;
+	int status = -EIO;
+
+	if (pwm)
+		return -EBUSY;
+	if (!r || irq < 0 || !mp || !*mp)
+		return -ENODEV;
+	if (*mp & ~((1<<PWM_NCHAN)-1)) {
+		dev_warn(&pdev->dev, "mask 0x%x ... more than %d channels\n",
+			*mp, PWM_NCHAN);
+		return -EINVAL;
+	}
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	spin_lock_init(&p->lock);
+	p->pdev = pdev;
+	p->mask = *mp;
+	p->irq = irq;
+	p->base = ioremap(r->start, r->end - r->start + 1);
+	if (!p->base)
+		goto fail;
+	p->clk = clk_get(&pdev->dev, "mck");
+	if (IS_ERR(p->clk)) {
+		status = PTR_ERR(p->clk);
+		p->clk = NULL;
+		goto fail;
+	}
+
+	status = request_irq(irq, pwm_irq, 0, pdev->name, p);
+	if (status < 0)
+		goto fail;
+
+	pwm = p;
+	platform_set_drvdata(pdev, p);
+
+	return 0;
+
+fail:
+	if (p->clk)
+		clk_put(p->clk);
+	if (p->base)
+		iounmap(p->base);
+
+	kfree(p);
+	return status;
+}
+
+static int __exit pwm_remove(struct platform_device *pdev)
+{
+	struct pwm *p = platform_get_drvdata(pdev);
+
+	if (p != pwm)
+		return -EINVAL;
+
+	clk_enable(pwm->clk);
+	pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1);
+	pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1);
+	clk_disable(pwm->clk);
+
+	pwm = NULL;
+
+	free_irq(p->irq, p);
+	clk_put(p->clk);
+	iounmap(p->base);
+	kfree(p);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pwm_driver = {
+	.driver = {
+		.name = "atmel_pwm",
+		.owner = THIS_MODULE,
+	},
+	.remove = __exit_p(pwm_remove),
+
+	/* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states;
+	 * and all AT91sam9263 states, albeit at reduced clock rate if
+	 * MCK becomes the slow clock (i.e. what Linux labels STR).
+	 */
+};
+
+static int __init pwm_init(void)
+{
+	return platform_driver_probe(&atmel_pwm_driver, pwm_probe);
+}
+module_init(pwm_init);
+
+static void __exit pwm_exit(void)
+{
+	platform_driver_unregister(&atmel_pwm_driver);
+}
+module_exit(pwm_exit);
+
+MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module");
+MODULE_LICENSE("GPL");
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h
index d6993a6b6473..7597b0bd2f01 100644
--- a/include/asm-avr32/arch-at32ap/board.h
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -51,6 +51,9 @@ struct platform_device *
 at32_add_device_ide(unsigned int id, unsigned int extint,
 		    struct ide_platform_data *data);
 
+/* mask says which PWM channels to mux */
+struct platform_device *at32_add_device_pwm(u32 mask);
+
 /* depending on what's hooked up, not all SSC pins will be used */
 #define	ATMEL_SSC_TK		0x01
 #define	ATMEL_SSC_TF		0x02
diff --git a/include/linux/atmel_pwm.h b/include/linux/atmel_pwm.h
new file mode 100644
index 000000000000..ea04abb3db8e
--- /dev/null
+++ b/include/linux/atmel_pwm.h
@@ -0,0 +1,70 @@
+#ifndef __LINUX_ATMEL_PWM_H
+#define __LINUX_ATMEL_PWM_H
+
+/**
+ * struct pwm_channel - driver handle to a PWM channel
+ * @regs: base of this channel's registers
+ * @index: number of this channel (0..31)
+ * @mck: base clock rate, which can be prescaled and maybe subdivided
+ *
+ * Drivers initialize a pwm_channel structure using pwm_channel_alloc().
+ * Then they configure its clock rate (derived from MCK), alignment,
+ * polarity, and duty cycle by writing directly to the channel registers,
+ * before enabling the channel by calling pwm_channel_enable().
+ *
+ * After emitting a PWM signal for the desired length of time, drivers
+ * may then pwm_channel_disable() or pwm_channel_free().  Both of these
+ * disable the channel, but when it's freed the IRQ is deconfigured and
+ * the channel must later be re-allocated and reconfigured.
+ *
+ * Note that if the period or duty cycle need to be changed while the
+ * PWM channel is operating, drivers must use the PWM_CUPD double buffer
+ * mechanism, either polling until they change or getting implicitly
+ * notified through a once-per-period interrupt handler.
+ */
+struct pwm_channel {
+	void __iomem	*regs;
+	unsigned	index;
+	unsigned long	mck;
+};
+
+extern int pwm_channel_alloc(int index, struct pwm_channel *ch);
+extern int pwm_channel_free(struct pwm_channel *ch);
+
+extern int pwm_clk_alloc(unsigned prescale, unsigned div);
+extern void pwm_clk_free(unsigned clk);
+
+extern int __pwm_channel_onoff(struct pwm_channel *ch, int enabled);
+
+#define pwm_channel_enable(ch)	__pwm_channel_onoff((ch), 1)
+#define pwm_channel_disable(ch)	__pwm_channel_onoff((ch), 0)
+
+/* periodic interrupts, mostly for CUPD changes to period or cycle */
+extern int pwm_channel_handler(struct pwm_channel *ch,
+		void (*handler)(struct pwm_channel *ch));
+
+/* per-channel registers (banked at pwm_channel->regs) */
+#define PWM_CMR		0x00		/* mode register */
+#define		PWM_CPR_CPD	(1 << 10)	/* set: CUPD modifies period */
+#define		PWM_CPR_CPOL	(1 << 9)	/* set: idle high */
+#define		PWM_CPR_CALG	(1 << 8)	/* set: center align */
+#define		PWM_CPR_CPRE	(0xf << 0)	/* mask: rate is mck/(2^pre) */
+#define		PWM_CPR_CLKA	(0xb << 0)	/* rate CLKA */
+#define		PWM_CPR_CLKB	(0xc << 0)	/* rate CLKB */
+#define PWM_CDTY	0x04		/* duty cycle (max of CPRD) */
+#define PWM_CPRD	0x08		/* period (count up from zero) */
+#define PWM_CCNT	0x0c		/* counter (20 bits?) */
+#define PWM_CUPD	0x10		/* update CPRD (or CDTY) next period */
+
+static inline void
+pwm_channel_writel(struct pwm_channel *pwmc, unsigned offset, u32 val)
+{
+	__raw_writel(val, pwmc->regs + offset);
+}
+
+static inline u32 pwm_channel_readl(struct pwm_channel *pwmc, unsigned offset)
+{
+	return __raw_readl(pwmc->regs + offset);
+}
+
+#endif /* __LINUX_ATMEL_PWM_H */
-- 
cgit 


From 7437a51b30743ff1488981a393fc9e67894bf757 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 8 Feb 2008 04:21:24 -0800
Subject: Remove __STRICT_ANSI__ from linux/types.h

All of the asm-*/types.h headers have been updated to no longer check
__STRICT_ANSI__ for the 64bit types, so this brings linux/types.h in line.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index b94c0e4efe24..9dc2346627b4 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -53,7 +53,7 @@ typedef __kernel_uid_t		uid_t;
 typedef __kernel_gid_t		gid_t;
 #endif /* __KERNEL__ */
 
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#if defined(__GNUC__)
 typedef __kernel_loff_t		loff_t;
 #endif
 
@@ -119,7 +119,7 @@ typedef		__u8		uint8_t;
 typedef		__u16		uint16_t;
 typedef		__u32		uint32_t;
 
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#if defined(__GNUC__)
 typedef		__u64		uint64_t;
 typedef		__u64		u_int64_t;
 typedef		__s64		int64_t;
@@ -181,7 +181,7 @@ typedef __u16 __bitwise __le16;
 typedef __u16 __bitwise __be16;
 typedef __u32 __bitwise __le32;
 typedef __u32 __bitwise __be32;
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#if defined(__GNUC__)
 typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
 #endif
-- 
cgit 


From 36e789144267105e0b3f2b9bca7db3184fce50dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 8 Feb 2008 04:21:24 -0800
Subject: kill do_generic_mapping_read

do_generic_mapping_read was used by gfs2 for internals reads, but this use
of the interface was rather suboptimal (as was the whole interface) and has
been replaced by an internal helper now.  This patch kills
do_generic_mapping_read and surrounding damage in preparation of additional
cleanups for the buffered read path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 15 ---------------
 mm/filemap.c       | 18 +++++-------------
 2 files changed, 5 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1137a8828089..3db22fc2249a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1807,9 +1807,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t, loff_t *, size_t, ssize_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern void do_generic_mapping_read(struct address_space *mapping,
-				    struct file_ra_state *, struct file *,
-				    loff_t *, read_descriptor_t *, read_actor_t);
 extern int generic_segment_checks(const struct iovec *iov,
 		unsigned long *nr_segs, size_t *count, int access_flags);
 
@@ -1847,18 +1844,6 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
 }
 #endif
 
-static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
-					read_descriptor_t * desc,
-					read_actor_t actor)
-{
-	do_generic_mapping_read(filp->f_mapping,
-				&filp->f_ra,
-				filp,
-				ppos,
-				desc,
-				actor);
-}
-
 #ifdef CONFIG_BLOCK
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
diff --git a/mm/filemap.c b/mm/filemap.c
index 4eb958c402fe..b7b1be6dbd83 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -875,9 +875,7 @@ static void shrink_readahead_size_eio(struct file *filp,
 }
 
 /**
- * do_generic_mapping_read - generic file read routine
- * @mapping:	address_space to be read
- * @ra:		file's readahead state
+ * do_generic_file_read - generic file read routine
  * @filp:	the file to read
  * @ppos:	current file position
  * @desc:	read_descriptor
@@ -888,18 +886,13 @@ static void shrink_readahead_size_eio(struct file *filp,
  *
  * This is really ugly. But the goto's actually try to clarify some
  * of the logic when it comes to error handling etc.
- *
- * Note the struct file* is only passed for the use of readpage.
- * It may be NULL.
  */
-void do_generic_mapping_read(struct address_space *mapping,
-			     struct file_ra_state *ra,
-			     struct file *filp,
-			     loff_t *ppos,
-			     read_descriptor_t *desc,
-			     read_actor_t actor)
+static void do_generic_file_read(struct file *filp, loff_t *ppos,
+		read_descriptor_t *desc, read_actor_t actor)
 {
+	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
+	struct file_ra_state *ra = &filp->f_ra;
 	pgoff_t index;
 	pgoff_t last_index;
 	pgoff_t prev_index;
@@ -1091,7 +1084,6 @@ out:
 	if (filp)
 		file_accessed(filp);
 }
-EXPORT_SYMBOL(do_generic_mapping_read);
 
 int file_read_actor(read_descriptor_t *desc, struct page *page,
 			unsigned long offset, unsigned long size)
-- 
cgit 


From 7ef3d2fd17c377ef64a2aa19677d17576606c3b4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 8 Feb 2008 04:21:25 -0800
Subject: printk_ratelimit() functions should use CONFIG_PRINTK

Makes an embedded image a bit smaller.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 18 +++++++++++++-----
 kernel/printk.c        |  2 ++
 kernel/sysctl.c        | 20 ++++++++++----------
 3 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7d7519628dfa..8f28d35867f8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -180,6 +180,13 @@ asmlinkage int printk(const char * fmt, ...)
 extern int log_buf_get_len(void);
 extern int log_buf_read(int idx);
 extern int log_buf_copy(char *dest, int idx, int len);
+
+extern int printk_ratelimit_jiffies;
+extern int printk_ratelimit_burst;
+extern int printk_ratelimit(void);
+extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+				   unsigned int interval_msec);
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
@@ -190,6 +197,12 @@ static inline int __cold printk(const char *s, ...) { return 0; }
 static inline int log_buf_get_len(void) { return 0; }
 static inline int log_buf_read(int idx) { return 0; }
 static inline int log_buf_copy(char *dest, int idx, int len) { return 0; }
+static inline int printk_ratelimit(void) { return 0; }
+static inline int __printk_ratelimit(int ratelimit_jiffies, \
+				     int ratelimit_burst) { return 0; }
+static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
+					  unsigned int interval_msec)	\
+		{ return false; }
 #endif
 
 extern void __attribute__((format(printf, 1, 2)))
@@ -197,11 +210,6 @@ extern void __attribute__((format(printf, 1, 2)))
 
 unsigned long int_sqrt(unsigned long);
 
-extern int printk_ratelimit(void);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
-				unsigned int interval_msec);
-
 static inline void console_silent(void)
 {
 	console_loglevel = 0;
diff --git a/kernel/printk.c b/kernel/printk.c
index e95e7c6e7b04..bee36100f110 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1251,6 +1251,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 	return;
 }
 
+#if defined CONFIG_PRINTK
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1320,3 +1321,4 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 	return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
+#endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a14fd29a7a92..d41ef6b4cf72 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -73,8 +73,6 @@ extern int suid_dumpable;
 extern char core_pattern[];
 extern int pid_max;
 extern int min_free_kbytes;
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
@@ -490,14 +488,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-	{
-		.ctl_name	= KERN_PRINTK,
-		.procname	= "printk",
-		.data		= &console_loglevel,
-		.maxlen		= 4*sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 #ifdef CONFIG_KMOD
 	{
 		.ctl_name	= KERN_MODPROBE,
@@ -644,6 +634,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#if defined CONFIG_PRINTK
+	{
+		.ctl_name	= KERN_PRINTK,
+		.procname	= "printk",
+		.data		= &console_loglevel,
+		.maxlen		= 4*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT,
 		.procname	= "printk_ratelimit",
@@ -661,6 +660,7 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
 	{
 		.ctl_name	= KERN_NGROUPS_MAX,
 		.procname	= "ngroups_max",
-- 
cgit 


From e542059884bb6d651d7ffc64eacedbab2b64078c Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 8 Feb 2008 04:21:31 -0800
Subject: drop linux/ufs_fs.h from userspace export and relocate it to
 fs/ufs/ufs_fs.h

Per previous discussions about cleaning up ufs_fs.h, people just want
this straight up dropped from userspace export.  The only remaining
consumer (silo) has been fixed a while ago to not rely on this header.
This allows use to move it completely from include/linux/ to fs/ufs/
seeing as how the only in-kernel consumer is fs/ufs/.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/balloc.c        |   2 +-
 fs/ufs/cylinder.c      |   2 +-
 fs/ufs/dir.c           |   2 +-
 fs/ufs/file.c          |   2 +-
 fs/ufs/ialloc.c        |   2 +-
 fs/ufs/inode.c         |   2 +-
 fs/ufs/namei.c         |   3 +-
 fs/ufs/super.c         |   2 +-
 fs/ufs/symlink.c       |   3 +-
 fs/ufs/truncate.c      |   2 +-
 fs/ufs/ufs_fs.h        | 947 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ufs/util.c          |   2 +-
 include/linux/Kbuild   |   1 -
 include/linux/ufs_fs.h | 953 -------------------------------------------------
 14 files changed, 960 insertions(+), 965 deletions(-)
 create mode 100644 fs/ufs/ufs_fs.h
 delete mode 100644 include/linux/ufs_fs.h

(limited to 'include/linux')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index f63a09ce8683..1fca381f0ce2 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/stat.h>
 #include <linux/time.h>
 #include <linux/string.h>
@@ -19,6 +18,7 @@
 #include <linux/bitops.h>
 #include <asm/byteorder.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/cylinder.c b/fs/ufs/cylinder.c
index 2a815665644f..b4676322ddb6 100644
--- a/fs/ufs/cylinder.c
+++ b/fs/ufs/cylinder.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/time.h>
 #include <linux/stat.h>
 #include <linux/string.h>
@@ -17,6 +16,7 @@
 
 #include <asm/byteorder.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index aaf2878305ce..ef563fc8d72c 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -18,9 +18,9 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/swap.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index a46c97bf023f..625ef17c6f83 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,9 +24,9 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/buffer_head.h>	/* for sync_mapping_buffers() */
 
+#include "ufs_fs.h"
 #include "ufs.h"
 
 
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 7e260bc0d94f..ac181f6806a3 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -24,7 +24,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/time.h>
 #include <linux/stat.h>
 #include <linux/string.h>
@@ -34,6 +33,7 @@
 #include <linux/bitops.h>
 #include <asm/byteorder.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 489f26bc26d9..5446b888fc8e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -30,7 +30,6 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/time.h>
 #include <linux/stat.h>
 #include <linux/string.h>
@@ -38,6 +37,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 747a4de6c695..e3a9b1fac75a 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -29,8 +29,9 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/smp_lock.h>
+
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "util.h"
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d18ccf36ba34..85b22b5977fa 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,7 +76,6 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/stat.h>
@@ -91,6 +90,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index 43ac10e75a4a..c0156eda44bc 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -27,7 +27,8 @@
 
 #include <linux/fs.h>
 #include <linux/namei.h>
-#include <linux/ufs_fs.h>
+
+#include "ufs_fs.h"
 #include "ufs.h"
 
 
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 311ded34c2b2..41dd431ce228 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -36,7 +36,6 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/ufs_fs.h>
 #include <linux/fcntl.h>
 #include <linux/time.h>
 #include <linux/stat.h>
@@ -46,6 +45,7 @@
 #include <linux/blkdev.h>
 #include <linux/sched.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
new file mode 100644
index 000000000000..54bde1895a80
--- /dev/null
+++ b/fs/ufs/ufs_fs.h
@@ -0,0 +1,947 @@
+/*
+ *  linux/include/linux/ufs_fs.h
+ *
+ * Copyright (C) 1996
+ * Adrian Rodriguez (adrian@franklins-tower.rutgers.edu)
+ * Laboratory for Computer Science Research Computing Facility
+ * Rutgers, The State University of New Jersey
+ *
+ * Clean swab support by Fare <fare@tunes.org>
+ * just hope no one is using NNUUXXI on __?64 structure elements
+ * 64-bit clean thanks to Maciej W. Rozycki <macro@ds2.pg.gda.pl>
+ *
+ * 4.4BSD (FreeBSD) support added on February 1st 1998 by
+ * Niels Kristian Bech Jensen <nkbj@image.dk> partially based
+ * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>.
+ *
+ * NeXTstep support added on February 5th 1998 by
+ * Niels Kristian Bech Jensen <nkbj@image.dk>.
+ *
+ * Write support by Daniel Pirkl <daniel.pirkl@email.cz>
+ *
+ * HP/UX hfs filesystem support added by
+ * Martin K. Petersen <mkp@mkp.net>, August 1999
+ *
+ * UFS2 (of FreeBSD 5.x) support added by
+ * Niraj Kumar <niraj17@iitbombay.org>  , Jan 2004
+ *
+ */
+
+#ifndef __LINUX_UFS_FS_H
+#define __LINUX_UFS_FS_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+
+#include <asm/div64.h>
+typedef __u64 __bitwise __fs64;
+typedef __u32 __bitwise __fs32;
+typedef __u16 __bitwise __fs16;
+
+#define UFS_BBLOCK 0
+#define UFS_BBSIZE 8192
+#define UFS_SBLOCK 8192
+#define UFS_SBSIZE 8192
+
+#define UFS_SECTOR_SIZE 512
+#define UFS_SECTOR_BITS 9
+#define UFS_MAGIC  0x00011954
+#define UFS2_MAGIC 0x19540119
+#define UFS_CIGAM  0x54190100 /* byteswapped MAGIC */
+
+/* Copied from FreeBSD */
+/*
+ * Each disk drive contains some number of filesystems.
+ * A filesystem consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A filesystem is described by its super-block, which in turn
+ * describes the cylinder groups.  The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss.  This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For filesystem fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ *      [fs->fs_sblkno]         Super-block
+ *      [fs->fs_cblkno]         Cylinder group block
+ *      [fs->fs_iblkno]         Inode blocks
+ *      [fs->fs_dblkno]         Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * Depending on the architecture and the media, the superblock may
+ * reside in any one of four places. For tiny media where every block
+ * counts, it is placed at the very front of the partition. Historically,
+ * UFS1 placed it 8K from the front to leave room for the disk label and
+ * a small bootstrap. For UFS2 it got moved to 64K from the front to leave
+ * room for the disk label and a bigger bootstrap, and for really piggy
+ * systems we check at 256K from the front if the first three fail. In
+ * all cases the size of the superblock will be SBLOCKSIZE. All values are
+ * given in byte-offset form, so they do not imply a sector size. The
+ * SBLOCKSEARCH specifies the order in which the locations should be searched.
+ */
+#define SBLOCK_FLOPPY        0
+#define SBLOCK_UFS1       8192
+#define SBLOCK_UFS2      65536
+#define SBLOCK_PIGGY    262144
+#define SBLOCKSIZE        8192
+#define SBLOCKSEARCH \
+        { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 }
+
+
+/* HP specific MAGIC values */
+
+#define UFS_MAGIC_LFN   0x00095014 /* fs supports filenames > 14 chars */
+#define UFS_CIGAM_LFN   0x14500900 /* srahc 41 < semanelif stroppus sf */
+
+#define UFS_MAGIC_SEC   0x00612195 /* B1 security fs */
+#define UFS_CIGAM_SEC   0x95216100
+
+#define UFS_MAGIC_FEA   0x00195612 /* fs_featurebits supported */
+#define UFS_CIGAM_FEA   0x12561900
+
+#define UFS_MAGIC_4GB   0x05231994 /* fs > 4 GB && fs_featurebits */
+#define UFS_CIGAM_4GB   0x94192305
+
+/* Seems somebody at HP goofed here. B1 and lfs are both 0x2 !?! */
+#define UFS_FSF_LFN     0x00000001 /* long file names */
+#define UFS_FSF_B1      0x00000002 /* B1 security */
+#define UFS_FSF_LFS     0x00000002 /* large files */
+#define UFS_FSF_LUID    0x00000004 /* large UIDs */
+
+/* End of HP stuff */
+
+
+#define UFS_BSIZE	8192
+#define UFS_MINBSIZE	4096
+#define UFS_FSIZE	1024
+#define UFS_MAXFRAG	(UFS_BSIZE / UFS_FSIZE)
+
+#define UFS_NDADDR 12
+#define UFS_NINDIR 3
+
+#define UFS_IND_BLOCK	(UFS_NDADDR + 0)
+#define UFS_DIND_BLOCK	(UFS_NDADDR + 1)
+#define UFS_TIND_BLOCK	(UFS_NDADDR + 2)
+
+#define UFS_NDIR_FRAGMENT (UFS_NDADDR << uspi->s_fpbshift)
+#define UFS_IND_FRAGMENT (UFS_IND_BLOCK << uspi->s_fpbshift)
+#define UFS_DIND_FRAGMENT (UFS_DIND_BLOCK << uspi->s_fpbshift)
+#define UFS_TIND_FRAGMENT (UFS_TIND_BLOCK << uspi->s_fpbshift)
+
+#define UFS_ROOTINO 2
+#define UFS_FIRST_INO (UFS_ROOTINO + 1)
+
+#define UFS_USEEFT  ((__u16)65535)
+
+#define UFS_FSOK      0x7c269d38
+#define UFS_FSACTIVE  ((__s8)0x00)
+#define UFS_FSCLEAN   ((__s8)0x01)
+#define UFS_FSSTABLE  ((__s8)0x02)
+#define UFS_FSOSF1    ((__s8)0x03)	/* is this correct for DEC OSF/1? */
+#define UFS_FSBAD     ((__s8)0xff)
+
+/* From here to next blank line, s_flags for ufs_sb_info */
+/* directory entry encoding */
+#define UFS_DE_MASK		0x00000010	/* mask for the following */
+#define UFS_DE_OLD		0x00000000
+#define UFS_DE_44BSD		0x00000010
+/* uid encoding */
+#define UFS_UID_MASK		0x00000060	/* mask for the following */
+#define UFS_UID_OLD		0x00000000
+#define UFS_UID_44BSD		0x00000020
+#define UFS_UID_EFT		0x00000040
+/* superblock state encoding */
+#define UFS_ST_MASK		0x00000700	/* mask for the following */
+#define UFS_ST_OLD		0x00000000
+#define UFS_ST_44BSD		0x00000100
+#define UFS_ST_SUN		0x00000200 /* Solaris */
+#define UFS_ST_SUNOS		0x00000300
+#define UFS_ST_SUNx86		0x00000400 /* Solaris x86 */
+/*cylinder group encoding */
+#define UFS_CG_MASK		0x00003000	/* mask for the following */
+#define UFS_CG_OLD		0x00000000
+#define UFS_CG_44BSD		0x00002000
+#define UFS_CG_SUN		0x00001000
+/* filesystem type encoding */
+#define UFS_TYPE_MASK		0x00010000	/* mask for the following */
+#define UFS_TYPE_UFS1		0x00000000
+#define UFS_TYPE_UFS2		0x00010000
+
+
+/* fs_inodefmt options */
+#define UFS_42INODEFMT	-1
+#define UFS_44INODEFMT	2
+
+/*
+ * MINFREE gives the minimum acceptable percentage of file system
+ * blocks which may be free. If the freelist drops below this level
+ * only the superuser may continue to allocate blocks. This may
+ * be set to 0 if no reserve of free blocks is deemed necessary,
+ * however throughput drops by fifty percent if the file system
+ * is run at between 95% and 100% full; thus the minimum default
+ * value of fs_minfree is 5%. However, to get good clustering
+ * performance, 10% is a better choice. hence we use 10% as our
+ * default value. With 10% free space, fragmentation is not a
+ * problem, so we choose to optimize for time.
+ */
+#define UFS_MINFREE         5
+#define UFS_DEFAULTOPT      UFS_OPTTIME
+
+/*
+ * Turn file system block numbers into disk block addresses.
+ * This maps file system blocks to device size blocks.
+ */
+#define ufs_fsbtodb(uspi, b)	((b) << (uspi)->s_fsbtodb)
+#define	ufs_dbtofsb(uspi, b)	((b) >> (uspi)->s_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc file system addresses of cylinder group data structures.
+ */
+#define	ufs_cgbase(c)	(uspi->s_fpg * (c))
+#define ufs_cgstart(c)	((uspi)->fs_magic == UFS2_MAGIC ?  ufs_cgbase(c) : \
+	(ufs_cgbase(c)  + uspi->s_cgoffset * ((c) & ~uspi->s_cgmask)))
+#define	ufs_cgsblock(c)	(ufs_cgstart(c) + uspi->s_sblkno)	/* super blk */
+#define	ufs_cgcmin(c)	(ufs_cgstart(c) + uspi->s_cblkno)	/* cg block */
+#define	ufs_cgimin(c)	(ufs_cgstart(c) + uspi->s_iblkno)	/* inode blk */
+#define	ufs_cgdmin(c)	(ufs_cgstart(c) + uspi->s_dblkno)	/* 1st data */
+
+/*
+ * Macros for handling inode numbers:
+ *     inode number to file system block offset.
+ *     inode number to cylinder group number.
+ *     inode number to file system block address.
+ */
+#define	ufs_inotocg(x)		((x) / uspi->s_ipg)
+#define	ufs_inotocgoff(x)	((x) % uspi->s_ipg)
+#define	ufs_inotofsba(x)	(((u64)ufs_cgimin(ufs_inotocg(x))) + ufs_inotocgoff(x) / uspi->s_inopf)
+#define	ufs_inotofsbo(x)	((x) % uspi->s_inopf)
+
+/*
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define ufs_cbtocylno(bno) \
+	((bno) * uspi->s_nspf / uspi->s_spc)
+#define ufs_cbtorpos(bno) \
+	((((bno) * uspi->s_nspf % uspi->s_spc / uspi->s_nsect \
+	* uspi->s_trackskew + (bno) * uspi->s_nspf % uspi->s_spc \
+	% uspi->s_nsect * uspi->s_interleave) % uspi->s_nsect \
+	* uspi->s_nrpos) / uspi->s_npsect)
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define ufs_blkoff(loc)		((loc) & uspi->s_qbmask)
+#define ufs_fragoff(loc)	((loc) & uspi->s_qfmask)
+#define ufs_lblktosize(blk)	((blk) << uspi->s_bshift)
+#define ufs_lblkno(loc)		((loc) >> uspi->s_bshift)
+#define ufs_numfrags(loc)	((loc) >> uspi->s_fshift)
+#define ufs_blkroundup(size)	(((size) + uspi->s_qbmask) & uspi->s_bmask)
+#define ufs_fragroundup(size)	(((size) + uspi->s_qfmask) & uspi->s_fmask)
+#define ufs_fragstoblks(frags)	((frags) >> uspi->s_fpbshift)
+#define ufs_blkstofrags(blks)	((blks) << uspi->s_fpbshift)
+#define ufs_fragnum(fsb)	((fsb) & uspi->s_fpbmask)
+#define ufs_blknum(fsb)		((fsb) & ~uspi->s_fpbmask)
+
+#define	UFS_MAXNAMLEN 255
+#define UFS_MAXMNTLEN 512
+#define UFS2_MAXMNTLEN 468
+#define UFS2_MAXVOLLEN 32
+#define UFS_MAXCSBUFS 31
+#define UFS_LINK_MAX 32000
+/*
+#define	UFS2_NOCSPTRS	((128 / sizeof(void *)) - 4)
+*/
+#define	UFS2_NOCSPTRS	28
+
+/*
+ * UFS_DIR_PAD defines the directory entries boundaries
+ * (must be a multiple of 4)
+ */
+#define UFS_DIR_PAD			4
+#define UFS_DIR_ROUND			(UFS_DIR_PAD - 1)
+#define UFS_DIR_REC_LEN(name_len)	(((name_len) + 1 + 8 + UFS_DIR_ROUND) & ~UFS_DIR_ROUND)
+
+struct ufs_timeval {
+	__fs32	tv_sec;
+	__fs32	tv_usec;
+};
+
+struct ufs_dir_entry {
+	__fs32  d_ino;			/* inode number of this entry */
+	__fs16  d_reclen;		/* length of this entry */
+	union {
+		__fs16	d_namlen;		/* actual length of d_name */
+		struct {
+			__u8	d_type;		/* file type */
+			__u8	d_namlen;	/* length of string in d_name */
+		} d_44;
+	} d_u;
+	__u8	d_name[UFS_MAXNAMLEN + 1];	/* file name */
+};
+
+struct ufs_csum {
+	__fs32	cs_ndir;	/* number of directories */
+	__fs32	cs_nbfree;	/* number of free blocks */
+	__fs32	cs_nifree;	/* number of free inodes */
+	__fs32	cs_nffree;	/* number of free frags */
+};
+struct ufs2_csum_total {
+	__fs64	cs_ndir;	/* number of directories */
+	__fs64	cs_nbfree;	/* number of free blocks */
+	__fs64	cs_nifree;	/* number of free inodes */
+	__fs64	cs_nffree;	/* number of free frags */
+	__fs64   cs_numclusters;	/* number of free clusters */
+	__fs64   cs_spare[3];	/* future expansion */
+};
+
+struct ufs_csum_core {
+	__u64	cs_ndir;	/* number of directories */
+	__u64	cs_nbfree;	/* number of free blocks */
+	__u64	cs_nifree;	/* number of free inodes */
+	__u64	cs_nffree;	/* number of free frags */
+	__u64   cs_numclusters;	/* number of free clusters */
+};
+
+/*
+ * File system flags
+ */
+#define UFS_UNCLEAN      0x01    /* file system not clean at mount (unused) */
+#define UFS_DOSOFTDEP    0x02    /* file system using soft dependencies */
+#define UFS_NEEDSFSCK    0x04    /* needs sync fsck (FreeBSD compat, unused) */
+#define UFS_INDEXDIRS    0x08    /* kernel supports indexed directories */
+#define UFS_ACLS         0x10    /* file system has ACLs enabled */
+#define UFS_MULTILABEL   0x20    /* file system is MAC multi-label */
+#define UFS_FLAGS_UPDATED 0x80   /* flags have been moved to new location */
+
+#if 0
+/*
+ * This is the actual superblock, as it is laid out on the disk.
+ * Do NOT use this structure, because of sizeof(ufs_super_block) > 512 and
+ * it may occupy several blocks, use
+ * struct ufs_super_block_(first,second,third) instead.
+ */
+struct ufs_super_block {
+	union {
+		struct {
+			__fs32	fs_link;	/* UNUSED */
+		} fs_42;
+		struct {
+			__fs32	fs_state;	/* file system state flag */
+		} fs_sun;
+	} fs_u0;
+	__fs32	fs_rlink;	/* UNUSED */
+	__fs32	fs_sblkno;	/* addr of super-block in filesys */
+	__fs32	fs_cblkno;	/* offset of cyl-block in filesys */
+	__fs32	fs_iblkno;	/* offset of inode-blocks in filesys */
+	__fs32	fs_dblkno;	/* offset of first data after cg */
+	__fs32	fs_cgoffset;	/* cylinder group offset in cylinder */
+	__fs32	fs_cgmask;	/* used to calc mod fs_ntrak */
+	__fs32	fs_time;	/* last time written -- time_t */
+	__fs32	fs_size;	/* number of blocks in fs */
+	__fs32	fs_dsize;	/* number of data blocks in fs */
+	__fs32	fs_ncg;		/* number of cylinder groups */
+	__fs32	fs_bsize;	/* size of basic blocks in fs */
+	__fs32	fs_fsize;	/* size of frag blocks in fs */
+	__fs32	fs_frag;	/* number of frags in a block in fs */
+/* these are configuration parameters */
+	__fs32	fs_minfree;	/* minimum percentage of free blocks */
+	__fs32	fs_rotdelay;	/* num of ms for optimal next block */
+	__fs32	fs_rps;		/* disk revolutions per second */
+/* these fields can be computed from the others */
+	__fs32	fs_bmask;	/* ``blkoff'' calc of blk offsets */
+	__fs32	fs_fmask;	/* ``fragoff'' calc of frag offsets */
+	__fs32	fs_bshift;	/* ``lblkno'' calc of logical blkno */
+	__fs32	fs_fshift;	/* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+	__fs32	fs_maxcontig;	/* max number of contiguous blks */
+	__fs32	fs_maxbpg;	/* max number of blks per cyl group */
+/* these fields can be computed from the others */
+	__fs32	fs_fragshift;	/* block to frag shift */
+	__fs32	fs_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
+	__fs32	fs_sbsize;	/* actual size of super block */
+	__fs32	fs_csmask;	/* csum block offset */
+	__fs32	fs_csshift;	/* csum block number */
+	__fs32	fs_nindir;	/* value of NINDIR */
+	__fs32	fs_inopb;	/* value of INOPB */
+	__fs32	fs_nspf;	/* value of NSPF */
+/* yet another configuration parameter */
+	__fs32	fs_optim;	/* optimization preference, see below */
+/* these fields are derived from the hardware */
+	union {
+		struct {
+			__fs32	fs_npsect;	/* # sectors/track including spares */
+		} fs_sun;
+		struct {
+			__fs32	fs_state;	/* file system state time stamp */
+		} fs_sunx86;
+	} fs_u1;
+	__fs32	fs_interleave;	/* hardware sector interleave */
+	__fs32	fs_trackskew;	/* sector 0 skew, per track */
+/* a unique id for this filesystem (currently unused and unmaintained) */
+/* In 4.3 Tahoe this space is used by fs_headswitch and fs_trkseek */
+/* Neither of those fields is used in the Tahoe code right now but */
+/* there could be problems if they are.                            */
+	__fs32	fs_id[2];	/* file system id */
+/* sizes determined by number of cylinder groups and their sizes */
+	__fs32	fs_csaddr;	/* blk addr of cyl grp summary area */
+	__fs32	fs_cssize;	/* size of cyl grp summary area */
+	__fs32	fs_cgsize;	/* cylinder group size */
+/* these fields are derived from the hardware */
+	__fs32	fs_ntrak;	/* tracks per cylinder */
+	__fs32	fs_nsect;	/* sectors per track */
+	__fs32	fs_spc;		/* sectors per cylinder */
+/* this comes from the disk driver partitioning */
+	__fs32	fs_ncyl;	/* cylinders in file system */
+/* these fields can be computed from the others */
+	__fs32	fs_cpg;		/* cylinders per group */
+	__fs32	fs_ipg;		/* inodes per cylinder group */
+	__fs32	fs_fpg;		/* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+	struct ufs_csum fs_cstotal;	/* cylinder summary information */
+/* these fields are cleared at mount time */
+	__s8	fs_fmod;	/* super block modified flag */
+	__s8	fs_clean;	/* file system is clean flag */
+	__s8	fs_ronly;	/* mounted read-only flag */
+	__s8	fs_flags;
+	union {
+		struct {
+			__s8	fs_fsmnt[UFS_MAXMNTLEN];/* name mounted on */
+			__fs32	fs_cgrotor;	/* last cg searched */
+			__fs32	fs_csp[UFS_MAXCSBUFS];/*list of fs_cs info buffers */
+			__fs32	fs_maxcluster;
+			__fs32	fs_cpc;		/* cyl per cycle in postbl */
+			__fs16	fs_opostbl[16][8]; /* old rotation block list head */
+		} fs_u1;
+		struct {
+			__s8  fs_fsmnt[UFS2_MAXMNTLEN];	/* name mounted on */
+			__u8   fs_volname[UFS2_MAXVOLLEN]; /* volume name */
+			__fs64  fs_swuid;		/* system-wide uid */
+			__fs32  fs_pad;	/* due to alignment of fs_swuid */
+			__fs32   fs_cgrotor;     /* last cg searched */
+			__fs32   fs_ocsp[UFS2_NOCSPTRS]; /*list of fs_cs info buffers */
+			__fs32   fs_contigdirs;/*# of contiguously allocated dirs */
+			__fs32   fs_csp;	/* cg summary info buffer for fs_cs */
+			__fs32   fs_maxcluster;
+			__fs32   fs_active;/* used by snapshots to track fs */
+			__fs32   fs_old_cpc;	/* cyl per cycle in postbl */
+			__fs32   fs_maxbsize;/*maximum blocking factor permitted */
+			__fs64   fs_sparecon64[17];/*old rotation block list head */
+			__fs64   fs_sblockloc; /* byte offset of standard superblock */
+			struct  ufs2_csum_total fs_cstotal;/*cylinder summary information*/
+			struct  ufs_timeval    fs_time;		/* last time written */
+			__fs64    fs_size;		/* number of blocks in fs */
+			__fs64    fs_dsize;	/* number of data blocks in fs */
+			__fs64   fs_csaddr;	/* blk addr of cyl grp summary area */
+			__fs64    fs_pendingblocks;/* blocks in process of being freed */
+			__fs32    fs_pendinginodes;/*inodes in process of being freed */
+		} fs_u2;
+	}  fs_u11;
+	union {
+		struct {
+			__fs32	fs_sparecon[53];/* reserved for future constants */
+			__fs32	fs_reclaim;
+			__fs32	fs_sparecon2[1];
+			__fs32	fs_state;	/* file system state time stamp */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+		} fs_sun;
+		struct {
+			__fs32	fs_sparecon[53];/* reserved for future constants */
+			__fs32	fs_reclaim;
+			__fs32	fs_sparecon2[1];
+			__fs32	fs_npsect;	/* # sectors/track including spares */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+		} fs_sunx86;
+		struct {
+			__fs32	fs_sparecon[50];/* reserved for future constants */
+			__fs32	fs_contigsumsize;/* size of cluster summary array */
+			__fs32	fs_maxsymlinklen;/* max length of an internal symlink */
+			__fs32	fs_inodefmt;	/* format of on-disk inodes */
+			__fs32	fs_maxfilesize[2];	/* max representable file size */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+			__fs32	fs_state;	/* file system state time stamp */
+		} fs_44;
+	} fs_u2;
+	__fs32	fs_postblformat;	/* format of positional layout tables */
+	__fs32	fs_nrpos;		/* number of rotational positions */
+	__fs32	fs_postbloff;		/* (__s16) rotation block list head */
+	__fs32	fs_rotbloff;		/* (__u8) blocks for each rotation */
+	__fs32	fs_magic;		/* magic number */
+	__u8	fs_space[1];		/* list of blocks for each rotation */
+};
+#endif/*struct ufs_super_block*/
+
+/*
+ * Preference for optimization.
+ */
+#define UFS_OPTTIME	0	/* minimize allocation time */
+#define UFS_OPTSPACE	1	/* minimize disk fragmentation */
+
+/*
+ * Rotational layout table format types
+ */
+#define UFS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
+#define UFS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ */
+#define fs_cs(indx) s_csp[(indx)]
+
+/*
+ * Cylinder group block for a file system.
+ *
+ * Writable fields in the cylinder group are protected by the associated
+ * super block lock fs->fs_lock.
+ */
+#define	CG_MAGIC	0x090255
+#define ufs_cg_chkmagic(sb, ucg) \
+	(fs32_to_cpu((sb), (ucg)->cg_magic) == CG_MAGIC)
+/*
+ * Macros for access to old cylinder group array structures
+ */
+#define ufs_ocg_blktot(sb, ucg)      fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_btot)
+#define ufs_ocg_blks(sb, ucg, cylno) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_b[cylno])
+#define ufs_ocg_inosused(sb, ucg)    fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_iused)
+#define ufs_ocg_blksfree(sb, ucg)    fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_free)
+#define ufs_ocg_chkmagic(sb, ucg) \
+	(fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_magic) == CG_MAGIC)
+
+/*
+ * size of this structure is 172 B
+ */
+struct	ufs_cylinder_group {
+	__fs32	cg_link;		/* linked list of cyl groups */
+	__fs32	cg_magic;		/* magic number */
+	__fs32	cg_time;		/* time last written */
+	__fs32	cg_cgx;			/* we are the cgx'th cylinder group */
+	__fs16	cg_ncyl;		/* number of cyl's this cg */
+	__fs16	cg_niblk;		/* number of inode blocks this cg */
+	__fs32	cg_ndblk;		/* number of data blocks this cg */
+	struct	ufs_csum cg_cs;		/* cylinder summary information */
+	__fs32	cg_rotor;		/* position of last used block */
+	__fs32	cg_frotor;		/* position of last used frag */
+	__fs32	cg_irotor;		/* position of last used inode */
+	__fs32	cg_frsum[UFS_MAXFRAG];	/* counts of available frags */
+	__fs32	cg_btotoff;		/* (__u32) block totals per cylinder */
+	__fs32	cg_boff;		/* (short) free block positions */
+	__fs32	cg_iusedoff;		/* (char) used inode map */
+	__fs32	cg_freeoff;		/* (u_char) free block map */
+	__fs32	cg_nextfreeoff;		/* (u_char) next available space */
+	union {
+		struct {
+			__fs32	cg_clustersumoff;	/* (u_int32) counts of avail clusters */
+			__fs32	cg_clusteroff;		/* (u_int8) free cluster map */
+			__fs32	cg_nclusterblks;	/* number of clusters this cg */
+			__fs32	cg_sparecon[13];	/* reserved for future use */
+		} cg_44;
+		struct {
+			__fs32	cg_clustersumoff;/* (u_int32) counts of avail clusters */
+			__fs32	cg_clusteroff;	/* (u_int8) free cluster map */
+			__fs32	cg_nclusterblks;/* number of clusters this cg */
+			__fs32   cg_niblk; /* number of inode blocks this cg */
+			__fs32   cg_initediblk;	/* last initialized inode */
+			__fs32   cg_sparecon32[3];/* reserved for future use */
+			__fs64   cg_time;	/* time last written */
+			__fs64	cg_sparecon[3];	/* reserved for future use */
+		} cg_u2;
+		__fs32	cg_sparecon[16];	/* reserved for future use */
+	} cg_u;
+	__u8	cg_space[1];		/* space for cylinder group maps */
+/* actually longer */
+};
+
+/* Historic Cylinder group info */
+struct ufs_old_cylinder_group {
+	__fs32	cg_link;		/* linked list of cyl groups */
+	__fs32	cg_rlink;		/* for incore cyl groups     */
+	__fs32	cg_time;		/* time last written */
+	__fs32	cg_cgx;			/* we are the cgx'th cylinder group */
+	__fs16	cg_ncyl;		/* number of cyl's this cg */
+	__fs16	cg_niblk;		/* number of inode blocks this cg */
+	__fs32	cg_ndblk;		/* number of data blocks this cg */
+	struct	ufs_csum cg_cs;		/* cylinder summary information */
+	__fs32	cg_rotor;		/* position of last used block */
+	__fs32	cg_frotor;		/* position of last used frag */
+	__fs32	cg_irotor;		/* position of last used inode */
+	__fs32	cg_frsum[8];		/* counts of available frags */
+	__fs32	cg_btot[32];		/* block totals per cylinder */
+	__fs16	cg_b[32][8];		/* positions of free blocks */
+	__u8	cg_iused[256];		/* used inode map */
+	__fs32	cg_magic;		/* magic number */
+	__u8	cg_free[1];		/* free block map */
+/* actually longer */
+};
+
+/*
+ * structure of an on-disk inode
+ */
+struct ufs_inode {
+	__fs16	ui_mode;		/*  0x0 */
+	__fs16	ui_nlink;		/*  0x2 */
+	union {
+		struct {
+			__fs16	ui_suid;	/*  0x4 */
+			__fs16	ui_sgid;	/*  0x6 */
+		} oldids;
+		__fs32	ui_inumber;		/*  0x4 lsf: inode number */
+		__fs32	ui_author;		/*  0x4 GNU HURD: author */
+	} ui_u1;
+	__fs64	ui_size;		/*  0x8 */
+	struct ufs_timeval ui_atime;	/* 0x10 access */
+	struct ufs_timeval ui_mtime;	/* 0x18 modification */
+	struct ufs_timeval ui_ctime;	/* 0x20 creation */
+	union {
+		struct {
+			__fs32	ui_db[UFS_NDADDR];/* 0x28 data blocks */
+			__fs32	ui_ib[UFS_NINDIR];/* 0x58 indirect blocks */
+		} ui_addr;
+		__u8	ui_symlink[4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */
+	} ui_u2;
+	__fs32	ui_flags;		/* 0x64 immutable, append-only... */
+	__fs32	ui_blocks;		/* 0x68 blocks in use */
+	__fs32	ui_gen;			/* 0x6c like ext2 i_version, for NFS support */
+	union {
+		struct {
+			__fs32	ui_shadow;	/* 0x70 shadow inode with security data */
+			__fs32	ui_uid;		/* 0x74 long EFT version of uid */
+			__fs32	ui_gid;		/* 0x78 long EFT version of gid */
+			__fs32	ui_oeftflag;	/* 0x7c reserved */
+		} ui_sun;
+		struct {
+			__fs32	ui_uid;		/* 0x70 File owner */
+			__fs32	ui_gid;		/* 0x74 File group */
+			__fs32	ui_spare[2];	/* 0x78 reserved */
+		} ui_44;
+		struct {
+			__fs32	ui_uid;		/* 0x70 */
+			__fs32	ui_gid;		/* 0x74 */
+			__fs16	ui_modeh;	/* 0x78 mode high bits */
+			__fs16	ui_spare;	/* 0x7A unused */
+			__fs32	ui_trans;	/* 0x7c filesystem translator */
+		} ui_hurd;
+	} ui_u3;
+};
+
+#define UFS_NXADDR  2            /* External addresses in inode. */
+struct ufs2_inode {
+	__fs16     ui_mode;        /*   0: IFMT, permissions; see below. */
+	__fs16     ui_nlink;       /*   2: File link count. */
+	__fs32     ui_uid;         /*   4: File owner. */
+	__fs32     ui_gid;         /*   8: File group. */
+	__fs32     ui_blksize;     /*  12: Inode blocksize. */
+	__fs64     ui_size;        /*  16: File byte count. */
+	__fs64     ui_blocks;      /*  24: Bytes actually held. */
+	__fs64   ui_atime;       /*  32: Last access time. */
+	__fs64   ui_mtime;       /*  40: Last modified time. */
+	__fs64   ui_ctime;       /*  48: Last inode change time. */
+	__fs64   ui_birthtime;   /*  56: Inode creation time. */
+	__fs32     ui_mtimensec;   /*  64: Last modified time. */
+	__fs32     ui_atimensec;   /*  68: Last access time. */
+	__fs32     ui_ctimensec;   /*  72: Last inode change time. */
+	__fs32     ui_birthnsec;   /*  76: Inode creation time. */
+	__fs32     ui_gen;         /*  80: Generation number. */
+	__fs32     ui_kernflags;   /*  84: Kernel flags. */
+	__fs32     ui_flags;       /*  88: Status flags (chflags). */
+	__fs32     ui_extsize;     /*  92: External attributes block. */
+	__fs64     ui_extb[UFS_NXADDR];/*  96: External attributes block. */
+	union {
+		struct {
+			__fs64     ui_db[UFS_NDADDR]; /* 112: Direct disk blocks. */
+			__fs64     ui_ib[UFS_NINDIR];/* 208: Indirect disk blocks.*/
+		} ui_addr;
+	__u8	ui_symlink[2*4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */
+	} ui_u2;
+	__fs64     ui_spare[3];    /* 232: Reserved; currently unused */
+};
+
+
+/* FreeBSD has these in sys/stat.h */
+/* ui_flags that can be set by a file owner */
+#define UFS_UF_SETTABLE   0x0000ffff
+#define UFS_UF_NODUMP     0x00000001  /* do not dump */
+#define UFS_UF_IMMUTABLE  0x00000002  /* immutable (can't "change") */
+#define UFS_UF_APPEND     0x00000004  /* append-only */
+#define UFS_UF_OPAQUE     0x00000008  /* directory is opaque (unionfs) */
+#define UFS_UF_NOUNLINK   0x00000010  /* can't be removed or renamed */
+/* ui_flags that only root can set */
+#define UFS_SF_SETTABLE   0xffff0000
+#define UFS_SF_ARCHIVED   0x00010000  /* archived */
+#define UFS_SF_IMMUTABLE  0x00020000  /* immutable (can't "change") */
+#define UFS_SF_APPEND     0x00040000  /* append-only */
+#define UFS_SF_NOUNLINK   0x00100000  /* can't be removed or renamed */
+
+/*
+ * This structure is used for reading disk structures larger
+ * than the size of fragment.
+ */
+struct ufs_buffer_head {
+	__u64 fragment;			/* first fragment */
+	__u64 count;				/* number of fragments */
+	struct buffer_head * bh[UFS_MAXFRAG];	/* buffers */
+};
+
+struct ufs_cg_private_info {
+	struct ufs_buffer_head c_ubh;
+	__u32	c_cgx;		/* number of cylidner group */
+	__u16	c_ncyl;		/* number of cyl's this cg */
+	__u16	c_niblk;	/* number of inode blocks this cg */
+	__u32	c_ndblk;	/* number of data blocks this cg */
+	__u32	c_rotor;	/* position of last used block */
+	__u32	c_frotor;	/* position of last used frag */
+	__u32	c_irotor;	/* position of last used inode */
+	__u32	c_btotoff;	/* (__u32) block totals per cylinder */
+	__u32	c_boff;		/* (short) free block positions */
+	__u32	c_iusedoff;	/* (char) used inode map */
+	__u32	c_freeoff;	/* (u_char) free block map */
+	__u32	c_nextfreeoff;	/* (u_char) next available space */
+	__u32	c_clustersumoff;/* (u_int32) counts of avail clusters */
+	__u32	c_clusteroff;	/* (u_int8) free cluster map */
+	__u32	c_nclusterblks;	/* number of clusters this cg */
+};
+
+
+struct ufs_sb_private_info {
+	struct ufs_buffer_head s_ubh; /* buffer containing super block */
+	struct ufs_csum_core cs_total;
+	__u32	s_sblkno;	/* offset of super-blocks in filesys */
+	__u32	s_cblkno;	/* offset of cg-block in filesys */
+	__u32	s_iblkno;	/* offset of inode-blocks in filesys */
+	__u32	s_dblkno;	/* offset of first data after cg */
+	__u32	s_cgoffset;	/* cylinder group offset in cylinder */
+	__u32	s_cgmask;	/* used to calc mod fs_ntrak */
+	__u32	s_size;		/* number of blocks (fragments) in fs */
+	__u32	s_dsize;	/* number of data blocks in fs */
+	__u64	s_u2_size;	/* ufs2: number of blocks (fragments) in fs */
+	__u64	s_u2_dsize;	/*ufs2:  number of data blocks in fs */
+	__u32	s_ncg;		/* number of cylinder groups */
+	__u32	s_bsize;	/* size of basic blocks */
+	__u32	s_fsize;	/* size of fragments */
+	__u32	s_fpb;		/* fragments per block */
+	__u32	s_minfree;	/* minimum percentage of free blocks */
+	__u32	s_bmask;	/* `blkoff'' calc of blk offsets */
+	__u32	s_fmask;	/* s_fsize mask */
+	__u32	s_bshift;	/* `lblkno'' calc of logical blkno */
+	__u32   s_fshift;	/* s_fsize shift */
+	__u32	s_fpbshift;	/* fragments per block shift */
+	__u32	s_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
+	__u32	s_sbsize;	/* actual size of super block */
+	__u32   s_csmask;	/* csum block offset */
+	__u32	s_csshift;	/* csum block number */
+	__u32	s_nindir;	/* value of NINDIR */
+	__u32	s_inopb;	/* value of INOPB */
+	__u32	s_nspf;		/* value of NSPF */
+	__u32	s_npsect;	/* # sectors/track including spares */
+	__u32	s_interleave;	/* hardware sector interleave */
+	__u32	s_trackskew;	/* sector 0 skew, per track */
+	__u64	s_csaddr;	/* blk addr of cyl grp summary area */
+	__u32	s_cssize;	/* size of cyl grp summary area */
+	__u32	s_cgsize;	/* cylinder group size */
+	__u32	s_ntrak;	/* tracks per cylinder */
+	__u32	s_nsect;	/* sectors per track */
+	__u32	s_spc;		/* sectors per cylinder */
+	__u32	s_ipg;		/* inodes per cylinder group */
+	__u32	s_fpg;		/* fragments per group */
+	__u32	s_cpc;		/* cyl per cycle in postbl */
+	__s32	s_contigsumsize;/* size of cluster summary array, 44bsd */
+	__s64	s_qbmask;	/* ~usb_bmask */
+	__s64	s_qfmask;	/* ~usb_fmask */
+	__s32	s_postblformat;	/* format of positional layout tables */
+	__s32	s_nrpos;	/* number of rotational positions */
+        __s32	s_postbloff;	/* (__s16) rotation block list head */
+	__s32	s_rotbloff;	/* (__u8) blocks for each rotation */
+
+	__u32	s_fpbmask;	/* fragments per block mask */
+	__u32	s_apb;		/* address per block */
+	__u32	s_2apb;		/* address per block^2 */
+	__u32	s_3apb;		/* address per block^3 */
+	__u32	s_apbmask;	/* address per block mask */
+	__u32	s_apbshift;	/* address per block shift */
+	__u32	s_2apbshift;	/* address per block shift * 2 */
+	__u32	s_3apbshift;	/* address per block shift * 3 */
+	__u32	s_nspfshift;	/* number of sector per fragment shift */
+	__u32	s_nspb;		/* number of sector per block */
+	__u32	s_inopf;	/* inodes per fragment */
+	__u32	s_sbbase;	/* offset of NeXTstep superblock */
+	__u32	s_bpf;		/* bits per fragment */
+	__u32	s_bpfshift;	/* bits per fragment shift*/
+	__u32	s_bpfmask;	/* bits per fragment mask */
+
+	__u32	s_maxsymlinklen;/* upper limit on fast symlinks' size */
+	__s32	fs_magic;       /* filesystem magic */
+	unsigned int s_dirblksize;
+};
+
+/*
+ * Sizes of this structures are:
+ *	ufs_super_block_first	512
+ *	ufs_super_block_second	512
+ *	ufs_super_block_third	356
+ */
+struct ufs_super_block_first {
+	union {
+		struct {
+			__fs32	fs_link;	/* UNUSED */
+		} fs_42;
+		struct {
+			__fs32	fs_state;	/* file system state flag */
+		} fs_sun;
+	} fs_u0;
+	__fs32	fs_rlink;
+	__fs32	fs_sblkno;
+	__fs32	fs_cblkno;
+	__fs32	fs_iblkno;
+	__fs32	fs_dblkno;
+	__fs32	fs_cgoffset;
+	__fs32	fs_cgmask;
+	__fs32	fs_time;
+	__fs32	fs_size;
+	__fs32	fs_dsize;
+	__fs32	fs_ncg;
+	__fs32	fs_bsize;
+	__fs32	fs_fsize;
+	__fs32	fs_frag;
+	__fs32	fs_minfree;
+	__fs32	fs_rotdelay;
+	__fs32	fs_rps;
+	__fs32	fs_bmask;
+	__fs32	fs_fmask;
+	__fs32	fs_bshift;
+	__fs32	fs_fshift;
+	__fs32	fs_maxcontig;
+	__fs32	fs_maxbpg;
+	__fs32	fs_fragshift;
+	__fs32	fs_fsbtodb;
+	__fs32	fs_sbsize;
+	__fs32	fs_csmask;
+	__fs32	fs_csshift;
+	__fs32	fs_nindir;
+	__fs32	fs_inopb;
+	__fs32	fs_nspf;
+	__fs32	fs_optim;
+	union {
+		struct {
+			__fs32	fs_npsect;
+		} fs_sun;
+		struct {
+			__fs32	fs_state;
+		} fs_sunx86;
+	} fs_u1;
+	__fs32	fs_interleave;
+	__fs32	fs_trackskew;
+	__fs32	fs_id[2];
+	__fs32	fs_csaddr;
+	__fs32	fs_cssize;
+	__fs32	fs_cgsize;
+	__fs32	fs_ntrak;
+	__fs32	fs_nsect;
+	__fs32	fs_spc;
+	__fs32	fs_ncyl;
+	__fs32	fs_cpg;
+	__fs32	fs_ipg;
+	__fs32	fs_fpg;
+	struct ufs_csum fs_cstotal;
+	__s8	fs_fmod;
+	__s8	fs_clean;
+	__s8	fs_ronly;
+	__s8	fs_flags;
+	__s8	fs_fsmnt[UFS_MAXMNTLEN - 212];
+
+};
+
+struct ufs_super_block_second {
+	union {
+		struct {
+			__s8	fs_fsmnt[212];
+			__fs32	fs_cgrotor;
+			__fs32	fs_csp[UFS_MAXCSBUFS];
+			__fs32	fs_maxcluster;
+			__fs32	fs_cpc;
+			__fs16	fs_opostbl[82];
+		} fs_u1;
+		struct {
+			__s8  fs_fsmnt[UFS2_MAXMNTLEN - UFS_MAXMNTLEN + 212];
+			__u8   fs_volname[UFS2_MAXVOLLEN];
+			__fs64  fs_swuid;
+			__fs32  fs_pad;
+			__fs32   fs_cgrotor;
+			__fs32   fs_ocsp[UFS2_NOCSPTRS];
+			__fs32   fs_contigdirs;
+			__fs32   fs_csp;
+			__fs32   fs_maxcluster;
+			__fs32   fs_active;
+			__fs32   fs_old_cpc;
+			__fs32   fs_maxbsize;
+			__fs64   fs_sparecon64[17];
+			__fs64   fs_sblockloc;
+			__fs64	cs_ndir;
+			__fs64	cs_nbfree;
+		} fs_u2;
+	} fs_un;
+};
+
+struct ufs_super_block_third {
+	union {
+		struct {
+			__fs16	fs_opostbl[46];
+		} fs_u1;
+		struct {
+			__fs64	cs_nifree;	/* number of free inodes */
+			__fs64	cs_nffree;	/* number of free frags */
+			__fs64   cs_numclusters;	/* number of free clusters */
+			__fs64   cs_spare[3];	/* future expansion */
+			struct  ufs_timeval    fs_time;		/* last time written */
+			__fs64    fs_size;		/* number of blocks in fs */
+			__fs64    fs_dsize;	/* number of data blocks in fs */
+			__fs64   fs_csaddr;	/* blk addr of cyl grp summary area */
+			__fs64    fs_pendingblocks;/* blocks in process of being freed */
+			__fs32    fs_pendinginodes;/*inodes in process of being freed */
+		} __attribute__ ((packed)) fs_u2;
+	} fs_un1;
+	union {
+		struct {
+			__fs32	fs_sparecon[53];/* reserved for future constants */
+			__fs32	fs_reclaim;
+			__fs32	fs_sparecon2[1];
+			__fs32	fs_state;	/* file system state time stamp */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+		} fs_sun;
+		struct {
+			__fs32	fs_sparecon[53];/* reserved for future constants */
+			__fs32	fs_reclaim;
+			__fs32	fs_sparecon2[1];
+			__fs32	fs_npsect;	/* # sectors/track including spares */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+		} fs_sunx86;
+		struct {
+			__fs32	fs_sparecon[50];/* reserved for future constants */
+			__fs32	fs_contigsumsize;/* size of cluster summary array */
+			__fs32	fs_maxsymlinklen;/* max length of an internal symlink */
+			__fs32	fs_inodefmt;	/* format of on-disk inodes */
+			__fs32	fs_maxfilesize[2];	/* max representable file size */
+			__fs32	fs_qbmask[2];	/* ~usb_bmask */
+			__fs32	fs_qfmask[2];	/* ~usb_fmask */
+			__fs32	fs_state;	/* file system state time stamp */
+		} fs_44;
+	} fs_un2;
+	__fs32	fs_postblformat;
+	__fs32	fs_nrpos;
+	__fs32	fs_postbloff;
+	__fs32	fs_rotbloff;
+	__fs32	fs_magic;
+	__u8	fs_space[1];
+};
+
+#endif /* __LINUX_UFS_FS_H */
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 410084dae389..85a7fc9e4a4e 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -8,9 +8,9 @@
  
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/ufs_fs.h>
 #include <linux/buffer_head.h>
 
+#include "ufs_fs.h"
 #include "ufs.h"
 #include "swab.h"
 #include "util.h"
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2ebf068ba504..5cae9b5960ea 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -338,7 +338,6 @@ unifdef-y += tty.h
 unifdef-y += types.h
 unifdef-y += udf_fs_i.h
 unifdef-y += udp.h
-unifdef-y += ufs_fs.h
 unifdef-y += uinput.h
 unifdef-y += uio.h
 unifdef-y += unistd.h
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
deleted file mode 100644
index 10b854d3561f..000000000000
--- a/include/linux/ufs_fs.h
+++ /dev/null
@@ -1,953 +0,0 @@
-/*
- *  linux/include/linux/ufs_fs.h
- *
- * Copyright (C) 1996
- * Adrian Rodriguez (adrian@franklins-tower.rutgers.edu)
- * Laboratory for Computer Science Research Computing Facility
- * Rutgers, The State University of New Jersey
- *
- * Clean swab support by Fare <fare@tunes.org>
- * just hope no one is using NNUUXXI on __?64 structure elements
- * 64-bit clean thanks to Maciej W. Rozycki <macro@ds2.pg.gda.pl>
- *
- * 4.4BSD (FreeBSD) support added on February 1st 1998 by
- * Niels Kristian Bech Jensen <nkbj@image.dk> partially based
- * on code by Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>.
- *
- * NeXTstep support added on February 5th 1998 by
- * Niels Kristian Bech Jensen <nkbj@image.dk>.
- *
- * Write support by Daniel Pirkl <daniel.pirkl@email.cz>
- *
- * HP/UX hfs filesystem support added by
- * Martin K. Petersen <mkp@mkp.net>, August 1999
- *
- * UFS2 (of FreeBSD 5.x) support added by
- * Niraj Kumar <niraj17@iitbombay.org>  , Jan 2004
- *
- */
-
-#ifndef __LINUX_UFS_FS_H
-#define __LINUX_UFS_FS_H
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/stat.h>
-#include <linux/fs.h>
-
-#ifndef __KERNEL__
-typedef __u64 __fs64;
-typedef __u32 __fs32;
-typedef __u16 __fs16;
-#else
-#include <asm/div64.h>
-typedef __u64 __bitwise __fs64;
-typedef __u32 __bitwise __fs32;
-typedef __u16 __bitwise __fs16;
-#endif
-
-#define UFS_BBLOCK 0
-#define UFS_BBSIZE 8192
-#define UFS_SBLOCK 8192
-#define UFS_SBSIZE 8192
-
-#define UFS_SECTOR_SIZE 512
-#define UFS_SECTOR_BITS 9
-#define UFS_MAGIC  0x00011954
-#define UFS2_MAGIC 0x19540119
-#define UFS_CIGAM  0x54190100 /* byteswapped MAGIC */
-
-/* Copied from FreeBSD */
-/*
- * Each disk drive contains some number of filesystems.
- * A filesystem consists of a number of cylinder groups.
- * Each cylinder group has inodes and data.
- *
- * A filesystem is described by its super-block, which in turn
- * describes the cylinder groups.  The super-block is critical
- * data and is replicated in each cylinder group to protect against
- * catastrophic loss.  This is done at `newfs' time and the critical
- * super-block data does not change, so the copies need not be
- * referenced further unless disaster strikes.
- *
- * For filesystem fs, the offsets of the various blocks of interest
- * are given in the super block as:
- *      [fs->fs_sblkno]         Super-block
- *      [fs->fs_cblkno]         Cylinder group block
- *      [fs->fs_iblkno]         Inode blocks
- *      [fs->fs_dblkno]         Data blocks
- * The beginning of cylinder group cg in fs, is given by
- * the ``cgbase(fs, cg)'' macro.
- *
- * Depending on the architecture and the media, the superblock may
- * reside in any one of four places. For tiny media where every block
- * counts, it is placed at the very front of the partition. Historically,
- * UFS1 placed it 8K from the front to leave room for the disk label and
- * a small bootstrap. For UFS2 it got moved to 64K from the front to leave
- * room for the disk label and a bigger bootstrap, and for really piggy
- * systems we check at 256K from the front if the first three fail. In
- * all cases the size of the superblock will be SBLOCKSIZE. All values are
- * given in byte-offset form, so they do not imply a sector size. The
- * SBLOCKSEARCH specifies the order in which the locations should be searched.
- */
-#define SBLOCK_FLOPPY        0
-#define SBLOCK_UFS1       8192
-#define SBLOCK_UFS2      65536
-#define SBLOCK_PIGGY    262144
-#define SBLOCKSIZE        8192
-#define SBLOCKSEARCH \
-        { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 }
-
-
-/* HP specific MAGIC values */
-
-#define UFS_MAGIC_LFN   0x00095014 /* fs supports filenames > 14 chars */
-#define UFS_CIGAM_LFN   0x14500900 /* srahc 41 < semanelif stroppus sf */
-
-#define UFS_MAGIC_SEC   0x00612195 /* B1 security fs */
-#define UFS_CIGAM_SEC   0x95216100
-
-#define UFS_MAGIC_FEA   0x00195612 /* fs_featurebits supported */
-#define UFS_CIGAM_FEA   0x12561900
-
-#define UFS_MAGIC_4GB   0x05231994 /* fs > 4 GB && fs_featurebits */
-#define UFS_CIGAM_4GB   0x94192305
-
-/* Seems somebody at HP goofed here. B1 and lfs are both 0x2 !?! */
-#define UFS_FSF_LFN     0x00000001 /* long file names */
-#define UFS_FSF_B1      0x00000002 /* B1 security */
-#define UFS_FSF_LFS     0x00000002 /* large files */
-#define UFS_FSF_LUID    0x00000004 /* large UIDs */
-
-/* End of HP stuff */
-
-
-#define UFS_BSIZE	8192
-#define UFS_MINBSIZE	4096
-#define UFS_FSIZE	1024
-#define UFS_MAXFRAG	(UFS_BSIZE / UFS_FSIZE)
-
-#define UFS_NDADDR 12
-#define UFS_NINDIR 3
-
-#define UFS_IND_BLOCK	(UFS_NDADDR + 0)
-#define UFS_DIND_BLOCK	(UFS_NDADDR + 1)
-#define UFS_TIND_BLOCK	(UFS_NDADDR + 2)
-
-#define UFS_NDIR_FRAGMENT (UFS_NDADDR << uspi->s_fpbshift)
-#define UFS_IND_FRAGMENT (UFS_IND_BLOCK << uspi->s_fpbshift)
-#define UFS_DIND_FRAGMENT (UFS_DIND_BLOCK << uspi->s_fpbshift)
-#define UFS_TIND_FRAGMENT (UFS_TIND_BLOCK << uspi->s_fpbshift)
-
-#define UFS_ROOTINO 2
-#define UFS_FIRST_INO (UFS_ROOTINO + 1)
-
-#define UFS_USEEFT  ((__u16)65535)
-
-#define UFS_FSOK      0x7c269d38
-#define UFS_FSACTIVE  ((__s8)0x00)
-#define UFS_FSCLEAN   ((__s8)0x01)
-#define UFS_FSSTABLE  ((__s8)0x02)
-#define UFS_FSOSF1    ((__s8)0x03)	/* is this correct for DEC OSF/1? */
-#define UFS_FSBAD     ((__s8)0xff)
-
-/* From here to next blank line, s_flags for ufs_sb_info */
-/* directory entry encoding */
-#define UFS_DE_MASK		0x00000010	/* mask for the following */
-#define UFS_DE_OLD		0x00000000
-#define UFS_DE_44BSD		0x00000010
-/* uid encoding */
-#define UFS_UID_MASK		0x00000060	/* mask for the following */
-#define UFS_UID_OLD		0x00000000
-#define UFS_UID_44BSD		0x00000020
-#define UFS_UID_EFT		0x00000040
-/* superblock state encoding */
-#define UFS_ST_MASK		0x00000700	/* mask for the following */
-#define UFS_ST_OLD		0x00000000
-#define UFS_ST_44BSD		0x00000100
-#define UFS_ST_SUN		0x00000200 /* Solaris */
-#define UFS_ST_SUNOS		0x00000300
-#define UFS_ST_SUNx86		0x00000400 /* Solaris x86 */
-/*cylinder group encoding */
-#define UFS_CG_MASK		0x00003000	/* mask for the following */
-#define UFS_CG_OLD		0x00000000
-#define UFS_CG_44BSD		0x00002000
-#define UFS_CG_SUN		0x00001000
-/* filesystem type encoding */
-#define UFS_TYPE_MASK		0x00010000	/* mask for the following */
-#define UFS_TYPE_UFS1		0x00000000
-#define UFS_TYPE_UFS2		0x00010000
-
-
-/* fs_inodefmt options */
-#define UFS_42INODEFMT	-1
-#define UFS_44INODEFMT	2
-
-/*
- * MINFREE gives the minimum acceptable percentage of file system
- * blocks which may be free. If the freelist drops below this level
- * only the superuser may continue to allocate blocks. This may
- * be set to 0 if no reserve of free blocks is deemed necessary,
- * however throughput drops by fifty percent if the file system
- * is run at between 95% and 100% full; thus the minimum default
- * value of fs_minfree is 5%. However, to get good clustering
- * performance, 10% is a better choice. hence we use 10% as our
- * default value. With 10% free space, fragmentation is not a
- * problem, so we choose to optimize for time.
- */
-#define UFS_MINFREE         5
-#define UFS_DEFAULTOPT      UFS_OPTTIME
-            
-/*
- * Turn file system block numbers into disk block addresses.
- * This maps file system blocks to device size blocks.
- */
-#define ufs_fsbtodb(uspi, b)	((b) << (uspi)->s_fsbtodb)
-#define	ufs_dbtofsb(uspi, b)	((b) >> (uspi)->s_fsbtodb)
-
-/*
- * Cylinder group macros to locate things in cylinder groups.
- * They calc file system addresses of cylinder group data structures.
- */
-#define	ufs_cgbase(c)	(uspi->s_fpg * (c))
-#define ufs_cgstart(c)	((uspi)->fs_magic == UFS2_MAGIC ?  ufs_cgbase(c) : \
-	(ufs_cgbase(c)  + uspi->s_cgoffset * ((c) & ~uspi->s_cgmask)))
-#define	ufs_cgsblock(c)	(ufs_cgstart(c) + uspi->s_sblkno)	/* super blk */
-#define	ufs_cgcmin(c)	(ufs_cgstart(c) + uspi->s_cblkno)	/* cg block */
-#define	ufs_cgimin(c)	(ufs_cgstart(c) + uspi->s_iblkno)	/* inode blk */
-#define	ufs_cgdmin(c)	(ufs_cgstart(c) + uspi->s_dblkno)	/* 1st data */
-
-/*
- * Macros for handling inode numbers:
- *     inode number to file system block offset.
- *     inode number to cylinder group number.
- *     inode number to file system block address.
- */
-#define	ufs_inotocg(x)		((x) / uspi->s_ipg)
-#define	ufs_inotocgoff(x)	((x) % uspi->s_ipg)
-#define	ufs_inotofsba(x)	(((u64)ufs_cgimin(ufs_inotocg(x))) + ufs_inotocgoff(x) / uspi->s_inopf)
-#define	ufs_inotofsbo(x)	((x) % uspi->s_inopf)
-
-/*
- * Compute the cylinder and rotational position of a cyl block addr.
- */
-#define ufs_cbtocylno(bno) \
-	((bno) * uspi->s_nspf / uspi->s_spc)
-#define ufs_cbtorpos(bno) \
-	((((bno) * uspi->s_nspf % uspi->s_spc / uspi->s_nsect \
-	* uspi->s_trackskew + (bno) * uspi->s_nspf % uspi->s_spc \
-	% uspi->s_nsect * uspi->s_interleave) % uspi->s_nsect \
-	* uspi->s_nrpos) / uspi->s_npsect)
-
-/*
- * The following macros optimize certain frequently calculated
- * quantities by using shifts and masks in place of divisions
- * modulos and multiplications.
- */
-#define ufs_blkoff(loc)		((loc) & uspi->s_qbmask)
-#define ufs_fragoff(loc)	((loc) & uspi->s_qfmask)
-#define ufs_lblktosize(blk)	((blk) << uspi->s_bshift)
-#define ufs_lblkno(loc)		((loc) >> uspi->s_bshift)
-#define ufs_numfrags(loc)	((loc) >> uspi->s_fshift)
-#define ufs_blkroundup(size)	(((size) + uspi->s_qbmask) & uspi->s_bmask)
-#define ufs_fragroundup(size)	(((size) + uspi->s_qfmask) & uspi->s_fmask)
-#define ufs_fragstoblks(frags)	((frags) >> uspi->s_fpbshift)
-#define ufs_blkstofrags(blks)	((blks) << uspi->s_fpbshift)
-#define ufs_fragnum(fsb)	((fsb) & uspi->s_fpbmask)
-#define ufs_blknum(fsb)		((fsb) & ~uspi->s_fpbmask)
-
-#define	UFS_MAXNAMLEN 255
-#define UFS_MAXMNTLEN 512
-#define UFS2_MAXMNTLEN 468
-#define UFS2_MAXVOLLEN 32
-#define UFS_MAXCSBUFS 31
-#define UFS_LINK_MAX 32000
-/*
-#define	UFS2_NOCSPTRS	((128 / sizeof(void *)) - 4)
-*/
-#define	UFS2_NOCSPTRS	28
-
-/*
- * UFS_DIR_PAD defines the directory entries boundaries
- * (must be a multiple of 4)
- */
-#define UFS_DIR_PAD			4
-#define UFS_DIR_ROUND			(UFS_DIR_PAD - 1)
-#define UFS_DIR_REC_LEN(name_len)	(((name_len) + 1 + 8 + UFS_DIR_ROUND) & ~UFS_DIR_ROUND)
-
-struct ufs_timeval {
-	__fs32	tv_sec;
-	__fs32	tv_usec;
-};
-
-struct ufs_dir_entry {
-	__fs32  d_ino;			/* inode number of this entry */
-	__fs16  d_reclen;		/* length of this entry */
-	union {
-		__fs16	d_namlen;		/* actual length of d_name */
-		struct {
-			__u8	d_type;		/* file type */
-			__u8	d_namlen;	/* length of string in d_name */
-		} d_44;
-	} d_u;
-	__u8	d_name[UFS_MAXNAMLEN + 1];	/* file name */
-};
-
-struct ufs_csum {
-	__fs32	cs_ndir;	/* number of directories */
-	__fs32	cs_nbfree;	/* number of free blocks */
-	__fs32	cs_nifree;	/* number of free inodes */
-	__fs32	cs_nffree;	/* number of free frags */
-};
-struct ufs2_csum_total {
-	__fs64	cs_ndir;	/* number of directories */
-	__fs64	cs_nbfree;	/* number of free blocks */
-	__fs64	cs_nifree;	/* number of free inodes */
-	__fs64	cs_nffree;	/* number of free frags */
-	__fs64   cs_numclusters;	/* number of free clusters */
-	__fs64   cs_spare[3];	/* future expansion */
-};
-
-struct ufs_csum_core {
-	__u64	cs_ndir;	/* number of directories */
-	__u64	cs_nbfree;	/* number of free blocks */
-	__u64	cs_nifree;	/* number of free inodes */
-	__u64	cs_nffree;	/* number of free frags */
-	__u64   cs_numclusters;	/* number of free clusters */
-};
-
-/*
- * File system flags
- */
-#define UFS_UNCLEAN      0x01    /* file system not clean at mount (unused) */
-#define UFS_DOSOFTDEP    0x02    /* file system using soft dependencies */
-#define UFS_NEEDSFSCK    0x04    /* needs sync fsck (FreeBSD compat, unused) */
-#define UFS_INDEXDIRS    0x08    /* kernel supports indexed directories */
-#define UFS_ACLS         0x10    /* file system has ACLs enabled */
-#define UFS_MULTILABEL   0x20    /* file system is MAC multi-label */
-#define UFS_FLAGS_UPDATED 0x80   /* flags have been moved to new location */
-
-#if 0
-/*
- * This is the actual superblock, as it is laid out on the disk.
- * Do NOT use this structure, because of sizeof(ufs_super_block) > 512 and
- * it may occupy several blocks, use
- * struct ufs_super_block_(first,second,third) instead.
- */
-struct ufs_super_block {
-	union {
-		struct {
-			__fs32	fs_link;	/* UNUSED */
-		} fs_42;
-		struct {
-			__fs32	fs_state;	/* file system state flag */
-		} fs_sun;
-	} fs_u0;
-	__fs32	fs_rlink;	/* UNUSED */
-	__fs32	fs_sblkno;	/* addr of super-block in filesys */
-	__fs32	fs_cblkno;	/* offset of cyl-block in filesys */
-	__fs32	fs_iblkno;	/* offset of inode-blocks in filesys */
-	__fs32	fs_dblkno;	/* offset of first data after cg */
-	__fs32	fs_cgoffset;	/* cylinder group offset in cylinder */
-	__fs32	fs_cgmask;	/* used to calc mod fs_ntrak */
-	__fs32	fs_time;	/* last time written -- time_t */
-	__fs32	fs_size;	/* number of blocks in fs */
-	__fs32	fs_dsize;	/* number of data blocks in fs */
-	__fs32	fs_ncg;		/* number of cylinder groups */
-	__fs32	fs_bsize;	/* size of basic blocks in fs */
-	__fs32	fs_fsize;	/* size of frag blocks in fs */
-	__fs32	fs_frag;	/* number of frags in a block in fs */
-/* these are configuration parameters */
-	__fs32	fs_minfree;	/* minimum percentage of free blocks */
-	__fs32	fs_rotdelay;	/* num of ms for optimal next block */
-	__fs32	fs_rps;		/* disk revolutions per second */
-/* these fields can be computed from the others */
-	__fs32	fs_bmask;	/* ``blkoff'' calc of blk offsets */
-	__fs32	fs_fmask;	/* ``fragoff'' calc of frag offsets */
-	__fs32	fs_bshift;	/* ``lblkno'' calc of logical blkno */
-	__fs32	fs_fshift;	/* ``numfrags'' calc number of frags */
-/* these are configuration parameters */
-	__fs32	fs_maxcontig;	/* max number of contiguous blks */
-	__fs32	fs_maxbpg;	/* max number of blks per cyl group */
-/* these fields can be computed from the others */
-	__fs32	fs_fragshift;	/* block to frag shift */
-	__fs32	fs_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
-	__fs32	fs_sbsize;	/* actual size of super block */
-	__fs32	fs_csmask;	/* csum block offset */
-	__fs32	fs_csshift;	/* csum block number */
-	__fs32	fs_nindir;	/* value of NINDIR */
-	__fs32	fs_inopb;	/* value of INOPB */
-	__fs32	fs_nspf;	/* value of NSPF */
-/* yet another configuration parameter */
-	__fs32	fs_optim;	/* optimization preference, see below */
-/* these fields are derived from the hardware */
-	union {
-		struct {
-			__fs32	fs_npsect;	/* # sectors/track including spares */
-		} fs_sun;
-		struct {
-			__fs32	fs_state;	/* file system state time stamp */
-		} fs_sunx86;
-	} fs_u1;
-	__fs32	fs_interleave;	/* hardware sector interleave */
-	__fs32	fs_trackskew;	/* sector 0 skew, per track */
-/* a unique id for this filesystem (currently unused and unmaintained) */
-/* In 4.3 Tahoe this space is used by fs_headswitch and fs_trkseek */
-/* Neither of those fields is used in the Tahoe code right now but */
-/* there could be problems if they are.                            */
-	__fs32	fs_id[2];	/* file system id */
-/* sizes determined by number of cylinder groups and their sizes */
-	__fs32	fs_csaddr;	/* blk addr of cyl grp summary area */
-	__fs32	fs_cssize;	/* size of cyl grp summary area */
-	__fs32	fs_cgsize;	/* cylinder group size */
-/* these fields are derived from the hardware */
-	__fs32	fs_ntrak;	/* tracks per cylinder */
-	__fs32	fs_nsect;	/* sectors per track */
-	__fs32	fs_spc;		/* sectors per cylinder */
-/* this comes from the disk driver partitioning */
-	__fs32	fs_ncyl;	/* cylinders in file system */
-/* these fields can be computed from the others */
-	__fs32	fs_cpg;		/* cylinders per group */
-	__fs32	fs_ipg;		/* inodes per cylinder group */
-	__fs32	fs_fpg;		/* blocks per group * fs_frag */
-/* this data must be re-computed after crashes */
-	struct ufs_csum fs_cstotal;	/* cylinder summary information */
-/* these fields are cleared at mount time */
-	__s8	fs_fmod;	/* super block modified flag */
-	__s8	fs_clean;	/* file system is clean flag */
-	__s8	fs_ronly;	/* mounted read-only flag */
-	__s8	fs_flags;
-	union {
-		struct {
-			__s8	fs_fsmnt[UFS_MAXMNTLEN];/* name mounted on */
-			__fs32	fs_cgrotor;	/* last cg searched */
-			__fs32	fs_csp[UFS_MAXCSBUFS];/*list of fs_cs info buffers */
-			__fs32	fs_maxcluster;
-			__fs32	fs_cpc;		/* cyl per cycle in postbl */
-			__fs16	fs_opostbl[16][8]; /* old rotation block list head */
-		} fs_u1;
-		struct {
-			__s8  fs_fsmnt[UFS2_MAXMNTLEN];	/* name mounted on */
-			__u8   fs_volname[UFS2_MAXVOLLEN]; /* volume name */
-			__fs64  fs_swuid;		/* system-wide uid */
-			__fs32  fs_pad;	/* due to alignment of fs_swuid */
-			__fs32   fs_cgrotor;     /* last cg searched */
-			__fs32   fs_ocsp[UFS2_NOCSPTRS]; /*list of fs_cs info buffers */
-			__fs32   fs_contigdirs;/*# of contiguously allocated dirs */
-			__fs32   fs_csp;	/* cg summary info buffer for fs_cs */
-			__fs32   fs_maxcluster;
-			__fs32   fs_active;/* used by snapshots to track fs */
-			__fs32   fs_old_cpc;	/* cyl per cycle in postbl */
-			__fs32   fs_maxbsize;/*maximum blocking factor permitted */
-			__fs64   fs_sparecon64[17];/*old rotation block list head */
-			__fs64   fs_sblockloc; /* byte offset of standard superblock */
-			struct  ufs2_csum_total fs_cstotal;/*cylinder summary information*/
-			struct  ufs_timeval    fs_time;		/* last time written */
-			__fs64    fs_size;		/* number of blocks in fs */
-			__fs64    fs_dsize;	/* number of data blocks in fs */
-			__fs64   fs_csaddr;	/* blk addr of cyl grp summary area */
-			__fs64    fs_pendingblocks;/* blocks in process of being freed */
-			__fs32    fs_pendinginodes;/*inodes in process of being freed */
-		} fs_u2;
-	}  fs_u11;
-	union {
-		struct {
-			__fs32	fs_sparecon[53];/* reserved for future constants */
-			__fs32	fs_reclaim;
-			__fs32	fs_sparecon2[1];
-			__fs32	fs_state;	/* file system state time stamp */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-		} fs_sun;
-		struct {
-			__fs32	fs_sparecon[53];/* reserved for future constants */
-			__fs32	fs_reclaim;
-			__fs32	fs_sparecon2[1];
-			__fs32	fs_npsect;	/* # sectors/track including spares */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-		} fs_sunx86;
-		struct {
-			__fs32	fs_sparecon[50];/* reserved for future constants */
-			__fs32	fs_contigsumsize;/* size of cluster summary array */
-			__fs32	fs_maxsymlinklen;/* max length of an internal symlink */
-			__fs32	fs_inodefmt;	/* format of on-disk inodes */
-			__fs32	fs_maxfilesize[2];	/* max representable file size */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-			__fs32	fs_state;	/* file system state time stamp */
-		} fs_44;
-	} fs_u2;
-	__fs32	fs_postblformat;	/* format of positional layout tables */
-	__fs32	fs_nrpos;		/* number of rotational positions */
-	__fs32	fs_postbloff;		/* (__s16) rotation block list head */
-	__fs32	fs_rotbloff;		/* (__u8) blocks for each rotation */
-	__fs32	fs_magic;		/* magic number */
-	__u8	fs_space[1];		/* list of blocks for each rotation */
-};
-#endif/*struct ufs_super_block*/
-
-/*
- * Preference for optimization.
- */
-#define UFS_OPTTIME	0	/* minimize allocation time */
-#define UFS_OPTSPACE	1	/* minimize disk fragmentation */
-
-/*
- * Rotational layout table format types
- */
-#define UFS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
-#define UFS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
-
-/*
- * Convert cylinder group to base address of its global summary info.
- */
-#define fs_cs(indx) s_csp[(indx)]
-
-/*
- * Cylinder group block for a file system.
- *
- * Writable fields in the cylinder group are protected by the associated
- * super block lock fs->fs_lock.
- */
-#define	CG_MAGIC	0x090255
-#define ufs_cg_chkmagic(sb, ucg) \
-	(fs32_to_cpu((sb), (ucg)->cg_magic) == CG_MAGIC)
-/*
- * Macros for access to old cylinder group array structures
- */
-#define ufs_ocg_blktot(sb, ucg)      fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_btot)
-#define ufs_ocg_blks(sb, ucg, cylno) fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_b[cylno])
-#define ufs_ocg_inosused(sb, ucg)    fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_iused)
-#define ufs_ocg_blksfree(sb, ucg)    fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_free)
-#define ufs_ocg_chkmagic(sb, ucg) \
-	(fs32_to_cpu((sb), ((struct ufs_old_cylinder_group *)(ucg))->cg_magic) == CG_MAGIC)
-
-/*
- * size of this structure is 172 B
- */
-struct	ufs_cylinder_group {
-	__fs32	cg_link;		/* linked list of cyl groups */
-	__fs32	cg_magic;		/* magic number */
-	__fs32	cg_time;		/* time last written */
-	__fs32	cg_cgx;			/* we are the cgx'th cylinder group */
-	__fs16	cg_ncyl;		/* number of cyl's this cg */
-	__fs16	cg_niblk;		/* number of inode blocks this cg */
-	__fs32	cg_ndblk;		/* number of data blocks this cg */
-	struct	ufs_csum cg_cs;		/* cylinder summary information */
-	__fs32	cg_rotor;		/* position of last used block */
-	__fs32	cg_frotor;		/* position of last used frag */
-	__fs32	cg_irotor;		/* position of last used inode */
-	__fs32	cg_frsum[UFS_MAXFRAG];	/* counts of available frags */
-	__fs32	cg_btotoff;		/* (__u32) block totals per cylinder */
-	__fs32	cg_boff;		/* (short) free block positions */
-	__fs32	cg_iusedoff;		/* (char) used inode map */
-	__fs32	cg_freeoff;		/* (u_char) free block map */
-	__fs32	cg_nextfreeoff;		/* (u_char) next available space */
-	union {
-		struct {
-			__fs32	cg_clustersumoff;	/* (u_int32) counts of avail clusters */
-			__fs32	cg_clusteroff;		/* (u_int8) free cluster map */
-			__fs32	cg_nclusterblks;	/* number of clusters this cg */
-			__fs32	cg_sparecon[13];	/* reserved for future use */
-		} cg_44;
-		struct {
-			__fs32	cg_clustersumoff;/* (u_int32) counts of avail clusters */
-			__fs32	cg_clusteroff;	/* (u_int8) free cluster map */
-			__fs32	cg_nclusterblks;/* number of clusters this cg */
-			__fs32   cg_niblk; /* number of inode blocks this cg */
-			__fs32   cg_initediblk;	/* last initialized inode */
-			__fs32   cg_sparecon32[3];/* reserved for future use */
-			__fs64   cg_time;	/* time last written */
-			__fs64	cg_sparecon[3];	/* reserved for future use */
-		} cg_u2;
-		__fs32	cg_sparecon[16];	/* reserved for future use */
-	} cg_u;
-	__u8	cg_space[1];		/* space for cylinder group maps */
-/* actually longer */
-};
-
-/* Historic Cylinder group info */
-struct ufs_old_cylinder_group {
-	__fs32	cg_link;		/* linked list of cyl groups */
-	__fs32	cg_rlink;		/* for incore cyl groups     */
-	__fs32	cg_time;		/* time last written */
-	__fs32	cg_cgx;			/* we are the cgx'th cylinder group */
-	__fs16	cg_ncyl;		/* number of cyl's this cg */
-	__fs16	cg_niblk;		/* number of inode blocks this cg */
-	__fs32	cg_ndblk;		/* number of data blocks this cg */
-	struct	ufs_csum cg_cs;		/* cylinder summary information */
-	__fs32	cg_rotor;		/* position of last used block */
-	__fs32	cg_frotor;		/* position of last used frag */
-	__fs32	cg_irotor;		/* position of last used inode */
-	__fs32	cg_frsum[8];		/* counts of available frags */
-	__fs32	cg_btot[32];		/* block totals per cylinder */
-	__fs16	cg_b[32][8];		/* positions of free blocks */
-	__u8	cg_iused[256];		/* used inode map */
-	__fs32	cg_magic;		/* magic number */
-	__u8	cg_free[1];		/* free block map */
-/* actually longer */
-};
-
-/*
- * structure of an on-disk inode
- */
-struct ufs_inode {
-	__fs16	ui_mode;		/*  0x0 */
-	__fs16	ui_nlink;		/*  0x2 */
-	union {
-		struct {
-			__fs16	ui_suid;	/*  0x4 */
-			__fs16	ui_sgid;	/*  0x6 */
-		} oldids;
-		__fs32	ui_inumber;		/*  0x4 lsf: inode number */
-		__fs32	ui_author;		/*  0x4 GNU HURD: author */
-	} ui_u1;
-	__fs64	ui_size;		/*  0x8 */
-	struct ufs_timeval ui_atime;	/* 0x10 access */
-	struct ufs_timeval ui_mtime;	/* 0x18 modification */
-	struct ufs_timeval ui_ctime;	/* 0x20 creation */
-	union {
-		struct {
-			__fs32	ui_db[UFS_NDADDR];/* 0x28 data blocks */
-			__fs32	ui_ib[UFS_NINDIR];/* 0x58 indirect blocks */
-		} ui_addr;
-		__u8	ui_symlink[4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */
-	} ui_u2;
-	__fs32	ui_flags;		/* 0x64 immutable, append-only... */
-	__fs32	ui_blocks;		/* 0x68 blocks in use */
-	__fs32	ui_gen;			/* 0x6c like ext2 i_version, for NFS support */
-	union {
-		struct {
-			__fs32	ui_shadow;	/* 0x70 shadow inode with security data */
-			__fs32	ui_uid;		/* 0x74 long EFT version of uid */
-			__fs32	ui_gid;		/* 0x78 long EFT version of gid */
-			__fs32	ui_oeftflag;	/* 0x7c reserved */
-		} ui_sun;
-		struct {
-			__fs32	ui_uid;		/* 0x70 File owner */
-			__fs32	ui_gid;		/* 0x74 File group */
-			__fs32	ui_spare[2];	/* 0x78 reserved */
-		} ui_44;
-		struct {
-			__fs32	ui_uid;		/* 0x70 */
-			__fs32	ui_gid;		/* 0x74 */
-			__fs16	ui_modeh;	/* 0x78 mode high bits */
-			__fs16	ui_spare;	/* 0x7A unused */
-			__fs32	ui_trans;	/* 0x7c filesystem translator */
-		} ui_hurd;
-	} ui_u3;
-};
-
-#define UFS_NXADDR  2            /* External addresses in inode. */
-struct ufs2_inode {
-	__fs16     ui_mode;        /*   0: IFMT, permissions; see below. */
-	__fs16     ui_nlink;       /*   2: File link count. */
-	__fs32     ui_uid;         /*   4: File owner. */
-	__fs32     ui_gid;         /*   8: File group. */
-	__fs32     ui_blksize;     /*  12: Inode blocksize. */
-	__fs64     ui_size;        /*  16: File byte count. */
-	__fs64     ui_blocks;      /*  24: Bytes actually held. */
-	__fs64   ui_atime;       /*  32: Last access time. */
-	__fs64   ui_mtime;       /*  40: Last modified time. */
-	__fs64   ui_ctime;       /*  48: Last inode change time. */
-	__fs64   ui_birthtime;   /*  56: Inode creation time. */
-	__fs32     ui_mtimensec;   /*  64: Last modified time. */
-	__fs32     ui_atimensec;   /*  68: Last access time. */
-	__fs32     ui_ctimensec;   /*  72: Last inode change time. */
-	__fs32     ui_birthnsec;   /*  76: Inode creation time. */
-	__fs32     ui_gen;         /*  80: Generation number. */
-	__fs32     ui_kernflags;   /*  84: Kernel flags. */
-	__fs32     ui_flags;       /*  88: Status flags (chflags). */
-	__fs32     ui_extsize;     /*  92: External attributes block. */
-	__fs64     ui_extb[UFS_NXADDR];/*  96: External attributes block. */
-	union {
-		struct {
-			__fs64     ui_db[UFS_NDADDR]; /* 112: Direct disk blocks. */
-			__fs64     ui_ib[UFS_NINDIR];/* 208: Indirect disk blocks.*/
-		} ui_addr;
-	__u8	ui_symlink[2*4*(UFS_NDADDR+UFS_NINDIR)];/* 0x28 fast symlink */
-	} ui_u2;
-	__fs64     ui_spare[3];    /* 232: Reserved; currently unused */
-};
-
-
-/* FreeBSD has these in sys/stat.h */
-/* ui_flags that can be set by a file owner */
-#define UFS_UF_SETTABLE   0x0000ffff
-#define UFS_UF_NODUMP     0x00000001  /* do not dump */
-#define UFS_UF_IMMUTABLE  0x00000002  /* immutable (can't "change") */
-#define UFS_UF_APPEND     0x00000004  /* append-only */
-#define UFS_UF_OPAQUE     0x00000008  /* directory is opaque (unionfs) */
-#define UFS_UF_NOUNLINK   0x00000010  /* can't be removed or renamed */
-/* ui_flags that only root can set */
-#define UFS_SF_SETTABLE   0xffff0000
-#define UFS_SF_ARCHIVED   0x00010000  /* archived */
-#define UFS_SF_IMMUTABLE  0x00020000  /* immutable (can't "change") */
-#define UFS_SF_APPEND     0x00040000  /* append-only */
-#define UFS_SF_NOUNLINK   0x00100000  /* can't be removed or renamed */
-
-/*
- * This structure is used for reading disk structures larger
- * than the size of fragment.
- */
-struct ufs_buffer_head {
-	__u64 fragment;			/* first fragment */
-	__u64 count;				/* number of fragments */
-	struct buffer_head * bh[UFS_MAXFRAG];	/* buffers */
-};
-
-struct ufs_cg_private_info {
-	struct ufs_buffer_head c_ubh;
-	__u32	c_cgx;		/* number of cylidner group */
-	__u16	c_ncyl;		/* number of cyl's this cg */
-	__u16	c_niblk;	/* number of inode blocks this cg */
-	__u32	c_ndblk;	/* number of data blocks this cg */
-	__u32	c_rotor;	/* position of last used block */
-	__u32	c_frotor;	/* position of last used frag */
-	__u32	c_irotor;	/* position of last used inode */
-	__u32	c_btotoff;	/* (__u32) block totals per cylinder */
-	__u32	c_boff;		/* (short) free block positions */
-	__u32	c_iusedoff;	/* (char) used inode map */
-	__u32	c_freeoff;	/* (u_char) free block map */
-	__u32	c_nextfreeoff;	/* (u_char) next available space */
-	__u32	c_clustersumoff;/* (u_int32) counts of avail clusters */
-	__u32	c_clusteroff;	/* (u_int8) free cluster map */
-	__u32	c_nclusterblks;	/* number of clusters this cg */
-};	
-
-
-struct ufs_sb_private_info {
-	struct ufs_buffer_head s_ubh; /* buffer containing super block */
-	struct ufs_csum_core cs_total;
-	__u32	s_sblkno;	/* offset of super-blocks in filesys */
-	__u32	s_cblkno;	/* offset of cg-block in filesys */
-	__u32	s_iblkno;	/* offset of inode-blocks in filesys */
-	__u32	s_dblkno;	/* offset of first data after cg */
-	__u32	s_cgoffset;	/* cylinder group offset in cylinder */
-	__u32	s_cgmask;	/* used to calc mod fs_ntrak */
-	__u32	s_size;		/* number of blocks (fragments) in fs */
-	__u32	s_dsize;	/* number of data blocks in fs */
-	__u64	s_u2_size;	/* ufs2: number of blocks (fragments) in fs */
-	__u64	s_u2_dsize;	/*ufs2:  number of data blocks in fs */
-	__u32	s_ncg;		/* number of cylinder groups */
-	__u32	s_bsize;	/* size of basic blocks */
-	__u32	s_fsize;	/* size of fragments */
-	__u32	s_fpb;		/* fragments per block */
-	__u32	s_minfree;	/* minimum percentage of free blocks */
-	__u32	s_bmask;	/* `blkoff'' calc of blk offsets */
-	__u32	s_fmask;	/* s_fsize mask */
-	__u32	s_bshift;	/* `lblkno'' calc of logical blkno */
-	__u32   s_fshift;	/* s_fsize shift */
-	__u32	s_fpbshift;	/* fragments per block shift */
-	__u32	s_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
-	__u32	s_sbsize;	/* actual size of super block */
-	__u32   s_csmask;	/* csum block offset */
-	__u32	s_csshift;	/* csum block number */
-	__u32	s_nindir;	/* value of NINDIR */
-	__u32	s_inopb;	/* value of INOPB */
-	__u32	s_nspf;		/* value of NSPF */
-	__u32	s_npsect;	/* # sectors/track including spares */
-	__u32	s_interleave;	/* hardware sector interleave */
-	__u32	s_trackskew;	/* sector 0 skew, per track */
-	__u64	s_csaddr;	/* blk addr of cyl grp summary area */
-	__u32	s_cssize;	/* size of cyl grp summary area */
-	__u32	s_cgsize;	/* cylinder group size */
-	__u32	s_ntrak;	/* tracks per cylinder */
-	__u32	s_nsect;	/* sectors per track */
-	__u32	s_spc;		/* sectors per cylinder */
-	__u32	s_ipg;		/* inodes per cylinder group */
-	__u32	s_fpg;		/* fragments per group */
-	__u32	s_cpc;		/* cyl per cycle in postbl */
-	__s32	s_contigsumsize;/* size of cluster summary array, 44bsd */
-	__s64	s_qbmask;	/* ~usb_bmask */
-	__s64	s_qfmask;	/* ~usb_fmask */
-	__s32	s_postblformat;	/* format of positional layout tables */
-	__s32	s_nrpos;	/* number of rotational positions */
-        __s32	s_postbloff;	/* (__s16) rotation block list head */
-	__s32	s_rotbloff;	/* (__u8) blocks for each rotation */
-
-	__u32	s_fpbmask;	/* fragments per block mask */
-	__u32	s_apb;		/* address per block */
-	__u32	s_2apb;		/* address per block^2 */
-	__u32	s_3apb;		/* address per block^3 */
-	__u32	s_apbmask;	/* address per block mask */
-	__u32	s_apbshift;	/* address per block shift */
-	__u32	s_2apbshift;	/* address per block shift * 2 */
-	__u32	s_3apbshift;	/* address per block shift * 3 */
-	__u32	s_nspfshift;	/* number of sector per fragment shift */
-	__u32	s_nspb;		/* number of sector per block */
-	__u32	s_inopf;	/* inodes per fragment */
-	__u32	s_sbbase;	/* offset of NeXTstep superblock */
-	__u32	s_bpf;		/* bits per fragment */
-	__u32	s_bpfshift;	/* bits per fragment shift*/
-	__u32	s_bpfmask;	/* bits per fragment mask */
-
-	__u32	s_maxsymlinklen;/* upper limit on fast symlinks' size */
-	__s32	fs_magic;       /* filesystem magic */
-	unsigned int s_dirblksize;
-};
-
-/*
- * Sizes of this structures are:
- *	ufs_super_block_first	512
- *	ufs_super_block_second	512
- *	ufs_super_block_third	356
- */
-struct ufs_super_block_first {
-	union {
-		struct {
-			__fs32	fs_link;	/* UNUSED */
-		} fs_42;
-		struct {
-			__fs32	fs_state;	/* file system state flag */
-		} fs_sun;
-	} fs_u0;
-	__fs32	fs_rlink;
-	__fs32	fs_sblkno;
-	__fs32	fs_cblkno;
-	__fs32	fs_iblkno;
-	__fs32	fs_dblkno;
-	__fs32	fs_cgoffset;
-	__fs32	fs_cgmask;
-	__fs32	fs_time;
-	__fs32	fs_size;
-	__fs32	fs_dsize;
-	__fs32	fs_ncg;
-	__fs32	fs_bsize;
-	__fs32	fs_fsize;
-	__fs32	fs_frag;
-	__fs32	fs_minfree;
-	__fs32	fs_rotdelay;
-	__fs32	fs_rps;
-	__fs32	fs_bmask;
-	__fs32	fs_fmask;
-	__fs32	fs_bshift;
-	__fs32	fs_fshift;
-	__fs32	fs_maxcontig;
-	__fs32	fs_maxbpg;
-	__fs32	fs_fragshift;
-	__fs32	fs_fsbtodb;
-	__fs32	fs_sbsize;
-	__fs32	fs_csmask;
-	__fs32	fs_csshift;
-	__fs32	fs_nindir;
-	__fs32	fs_inopb;
-	__fs32	fs_nspf;
-	__fs32	fs_optim;
-	union {
-		struct {
-			__fs32	fs_npsect;
-		} fs_sun;
-		struct {
-			__fs32	fs_state;
-		} fs_sunx86;
-	} fs_u1;
-	__fs32	fs_interleave;
-	__fs32	fs_trackskew;
-	__fs32	fs_id[2];
-	__fs32	fs_csaddr;
-	__fs32	fs_cssize;
-	__fs32	fs_cgsize;
-	__fs32	fs_ntrak;
-	__fs32	fs_nsect;
-	__fs32	fs_spc;
-	__fs32	fs_ncyl;
-	__fs32	fs_cpg;
-	__fs32	fs_ipg;
-	__fs32	fs_fpg;
-	struct ufs_csum fs_cstotal;
-	__s8	fs_fmod;
-	__s8	fs_clean;
-	__s8	fs_ronly;
-	__s8	fs_flags;
-	__s8	fs_fsmnt[UFS_MAXMNTLEN - 212];
-
-};
-
-struct ufs_super_block_second {
-	union {
-		struct {
-			__s8	fs_fsmnt[212];
-			__fs32	fs_cgrotor;
-			__fs32	fs_csp[UFS_MAXCSBUFS];
-			__fs32	fs_maxcluster;
-			__fs32	fs_cpc;
-			__fs16	fs_opostbl[82];
-		} fs_u1;
-		struct {
-			__s8  fs_fsmnt[UFS2_MAXMNTLEN - UFS_MAXMNTLEN + 212];
-			__u8   fs_volname[UFS2_MAXVOLLEN];
-			__fs64  fs_swuid;
-			__fs32  fs_pad;
-			__fs32   fs_cgrotor;
-			__fs32   fs_ocsp[UFS2_NOCSPTRS];
-			__fs32   fs_contigdirs;
-			__fs32   fs_csp;
-			__fs32   fs_maxcluster;
-			__fs32   fs_active;
-			__fs32   fs_old_cpc;
-			__fs32   fs_maxbsize;
-			__fs64   fs_sparecon64[17];
-			__fs64   fs_sblockloc;
-			__fs64	cs_ndir;
-			__fs64	cs_nbfree;
-		} fs_u2;
-	} fs_un;
-};
-
-struct ufs_super_block_third {
-	union {
-		struct {
-			__fs16	fs_opostbl[46];
-		} fs_u1;
-		struct {
-			__fs64	cs_nifree;	/* number of free inodes */
-			__fs64	cs_nffree;	/* number of free frags */
-			__fs64   cs_numclusters;	/* number of free clusters */
-			__fs64   cs_spare[3];	/* future expansion */
-			struct  ufs_timeval    fs_time;		/* last time written */
-			__fs64    fs_size;		/* number of blocks in fs */
-			__fs64    fs_dsize;	/* number of data blocks in fs */
-			__fs64   fs_csaddr;	/* blk addr of cyl grp summary area */
-			__fs64    fs_pendingblocks;/* blocks in process of being freed */
-			__fs32    fs_pendinginodes;/*inodes in process of being freed */
-		} __attribute__ ((packed)) fs_u2;
-	} fs_un1;
-	union {
-		struct {
-			__fs32	fs_sparecon[53];/* reserved for future constants */
-			__fs32	fs_reclaim;
-			__fs32	fs_sparecon2[1];
-			__fs32	fs_state;	/* file system state time stamp */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-		} fs_sun;
-		struct {
-			__fs32	fs_sparecon[53];/* reserved for future constants */
-			__fs32	fs_reclaim;
-			__fs32	fs_sparecon2[1];
-			__fs32	fs_npsect;	/* # sectors/track including spares */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-		} fs_sunx86;
-		struct {
-			__fs32	fs_sparecon[50];/* reserved for future constants */
-			__fs32	fs_contigsumsize;/* size of cluster summary array */
-			__fs32	fs_maxsymlinklen;/* max length of an internal symlink */
-			__fs32	fs_inodefmt;	/* format of on-disk inodes */
-			__fs32	fs_maxfilesize[2];	/* max representable file size */
-			__fs32	fs_qbmask[2];	/* ~usb_bmask */
-			__fs32	fs_qfmask[2];	/* ~usb_fmask */
-			__fs32	fs_state;	/* file system state time stamp */
-		} fs_44;
-	} fs_un2;
-	__fs32	fs_postblformat;
-	__fs32	fs_nrpos;
-	__fs32	fs_postbloff;
-	__fs32	fs_rotbloff;
-	__fs32	fs_magic;
-	__u8	fs_space[1];
-};
-
-#endif /* __LINUX_UFS_FS_H */
-- 
cgit 


From b3b304a23a8f7ae4c40c7b512ee45afae0010a70 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 8 Feb 2008 04:21:35 -0800
Subject: mount options: add generic_show_options()

Add a new s_options field to struct super_block.  Filesystems can save
mount options passed to them in mount or remount.  It is automatically
freed when the superblock is destroyed.

A new helper function, generic_show_options() is introduced, which uses
this field to display the mount options in /proc/mounts.

Another helper function, save_mount_options() may be used by
filesystems to save the options in the super block.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c     | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 fs/super.c         |  1 +
 include/linux/fs.h |  9 +++++++++
 3 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index e9c10cd01e13..db51ddc9b671 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -320,6 +320,50 @@ void mnt_unpin(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(mnt_unpin);
 
+static inline void mangle(struct seq_file *m, const char *s)
+{
+	seq_escape(m, s, " \t\n\\");
+}
+
+/*
+ * Simple .show_options callback for filesystems which don't want to
+ * implement more complex mount option showing.
+ *
+ * See also save_mount_options().
+ */
+int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	const char *options = mnt->mnt_sb->s_options;
+
+	if (options != NULL && options[0]) {
+		seq_putc(m, ',');
+		mangle(m, options);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(generic_show_options);
+
+/*
+ * If filesystem uses generic_show_options(), this function should be
+ * called from the fill_super() callback.
+ *
+ * The .remount_fs callback usually needs to be handled in a special
+ * way, to make sure, that previous options are not overwritten if the
+ * remount fails.
+ *
+ * Also note, that if the filesystem's .remount_fs function doesn't
+ * reset all options to their default value, but changes only newly
+ * given options, then the displayed options will not reflect reality
+ * any more.
+ */
+void save_mount_options(struct super_block *sb, char *options)
+{
+	kfree(sb->s_options);
+	sb->s_options = kstrdup(options, GFP_KERNEL);
+}
+EXPORT_SYMBOL(save_mount_options);
+
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
@@ -341,11 +385,6 @@ static void m_stop(struct seq_file *m, void *v)
 	up_read(&namespace_sem);
 }
 
-static inline void mangle(struct seq_file *m, const char *s)
-{
-	seq_escape(m, s, " \t\n\\");
-}
-
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
 	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
diff --git a/fs/super.c b/fs/super.c
index ceaf2e3d594c..65f6849847f4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -105,6 +105,7 @@ static inline void destroy_super(struct super_block *s)
 {
 	security_sb_free(s);
 	kfree(s->s_subtype);
+	kfree(s->s_options);
 	kfree(s);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3db22fc2249a..cb3a9001f3b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1038,6 +1038,12 @@ struct super_block {
 	 * in /proc/mounts will be "type.subtype"
 	 */
 	char *s_subtype;
+
+	/*
+	 * Saved mount options for lazy filesystems using
+	 * generic_show_options()
+	 */
+	char *s_options;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
@@ -1970,6 +1976,9 @@ extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
 
+extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
+extern void save_mount_options(struct super_block *sb, char *options);
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
-- 
cgit 


From 680d794babebc74484c141448baa9b95b211cf5e Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Fri, 8 Feb 2008 04:21:48 -0800
Subject: mount options: fix tmpfs

Add .show_options super operation to tmpfs.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |   5 +-
 mm/shmem.c               | 196 +++++++++++++++++++++++++++++++----------------
 2 files changed, 136 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index f3c51899117f..8d5fb36ea047 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -30,9 +30,12 @@ struct shmem_sb_info {
 	unsigned long free_blocks;  /* How many are left for allocation */
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
+	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
+	uid_t uid;		    /* Mount uid for root directory */
+	gid_t gid;		    /* Mount gid for root directory */
+	mode_t mode;		    /* Mount mode for root directory */
 	int policy;		    /* Default NUMA memory alloc policy */
 	nodemask_t policy_nodes;    /* nodemask for preferred and bind */
-	spinlock_t    stat_lock;
 };
 
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
diff --git a/mm/shmem.c b/mm/shmem.c
index 85bed948fafc..2f961a6dbf57 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -49,6 +49,7 @@
 #include <linux/ctype.h>
 #include <linux/migrate.h>
 #include <linux/highmem.h>
+#include <linux/seq_file.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
@@ -84,6 +85,16 @@ enum sgp_type {
 	SGP_WRITE,	/* may exceed i_size, may allocate page */
 };
 
+static unsigned long shmem_default_max_blocks(void)
+{
+	return totalram_pages / 2;
+}
+
+static unsigned long shmem_default_max_inodes(void)
+{
+	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
+}
+
 static int shmem_getpage(struct inode *inode, unsigned long idx,
 			 struct page **pagep, enum sgp_type sgp, int *type);
 
@@ -1068,7 +1079,8 @@ redirty:
 }
 
 #ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+#ifdef CONFIG_TMPFS
+static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
 {
 	char *nodelist = strchr(value, ':');
 	int err = 1;
@@ -1117,6 +1129,42 @@ out:
 	return err;
 }
 
+static void shmem_show_mpol(struct seq_file *seq, int policy,
+			    const nodemask_t policy_nodes)
+{
+	char *policy_string;
+
+	switch (policy) {
+	case MPOL_PREFERRED:
+		policy_string = "prefer";
+		break;
+	case MPOL_BIND:
+		policy_string = "bind";
+		break;
+	case MPOL_INTERLEAVE:
+		policy_string = "interleave";
+		break;
+	default:
+		/* MPOL_DEFAULT */
+		return;
+	}
+
+	seq_printf(seq, ",mpol=%s", policy_string);
+
+	if (policy != MPOL_INTERLEAVE ||
+	    !nodes_equal(policy_nodes, node_states[N_HIGH_MEMORY])) {
+		char buffer[64];
+		int len;
+
+		len = nodelist_scnprintf(buffer, sizeof(buffer), policy_nodes);
+		if (len < sizeof(buffer))
+			seq_printf(seq, ":%s", buffer);
+		else
+			seq_printf(seq, ":?");
+	}
+}
+#endif /* CONFIG_TMPFS */
+
 static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
 			struct shmem_inode_info *info, unsigned long idx)
 {
@@ -1148,13 +1196,20 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 	mpol_free(pvma.vm_policy);
 	return page;
 }
-#else
+#else /* !CONFIG_NUMA */
+#ifdef CONFIG_TMPFS
 static inline int shmem_parse_mpol(char *value, int *policy,
 						nodemask_t *policy_nodes)
 {
 	return 1;
 }
 
+static inline void shmem_show_mpol(struct seq_file *seq, int policy,
+			    const nodemask_t policy_nodes)
+{
+}
+#endif /* CONFIG_TMPFS */
+
 static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
 			struct shmem_inode_info *info, unsigned long idx)
 {
@@ -1166,7 +1221,7 @@ static inline struct page *shmem_alloc_page(gfp_t gfp,
 {
 	return alloc_page(gfp);
 }
-#endif
+#endif /* CONFIG_NUMA */
 
 /*
  * shmem_getpage - either get the page from swap or allocate a new one
@@ -2077,9 +2132,8 @@ static const struct export_operations shmem_export_ops = {
 	.fh_to_dentry	= shmem_fh_to_dentry,
 };
 
-static int shmem_parse_options(char *options, int *mode, uid_t *uid,
-	gid_t *gid, unsigned long *blocks, unsigned long *inodes,
-	int *policy, nodemask_t *policy_nodes)
+static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
+			       bool remount)
 {
 	char *this_char, *value, *rest;
 
@@ -2122,35 +2176,37 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid,
 			}
 			if (*rest)
 				goto bad_val;
-			*blocks = DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
+			sbinfo->max_blocks =
+				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
 		} else if (!strcmp(this_char,"nr_blocks")) {
-			*blocks = memparse(value,&rest);
+			sbinfo->max_blocks = memparse(value, &rest);
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"nr_inodes")) {
-			*inodes = memparse(value,&rest);
+			sbinfo->max_inodes = memparse(value, &rest);
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mode")) {
-			if (!mode)
+			if (remount)
 				continue;
-			*mode = simple_strtoul(value,&rest,8);
+			sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"uid")) {
-			if (!uid)
+			if (remount)
 				continue;
-			*uid = simple_strtoul(value,&rest,0);
+			sbinfo->uid = simple_strtoul(value, &rest, 0);
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"gid")) {
-			if (!gid)
+			if (remount)
 				continue;
-			*gid = simple_strtoul(value,&rest,0);
+			sbinfo->gid = simple_strtoul(value, &rest, 0);
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (shmem_parse_mpol(value,policy,policy_nodes))
+			if (shmem_parse_mpol(value, &sbinfo->policy,
+					     &sbinfo->policy_nodes))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2170,24 +2226,20 @@ bad_val:
 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
-	unsigned long max_blocks = sbinfo->max_blocks;
-	unsigned long max_inodes = sbinfo->max_inodes;
-	int policy = sbinfo->policy;
-	nodemask_t policy_nodes = sbinfo->policy_nodes;
+	struct shmem_sb_info config = *sbinfo;
 	unsigned long blocks;
 	unsigned long inodes;
 	int error = -EINVAL;
 
-	if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks,
-				&max_inodes, &policy, &policy_nodes))
+	if (shmem_parse_options(data, &config, true))
 		return error;
 
 	spin_lock(&sbinfo->stat_lock);
 	blocks = sbinfo->max_blocks - sbinfo->free_blocks;
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
-	if (max_blocks < blocks)
+	if (config.max_blocks < blocks)
 		goto out;
-	if (max_inodes < inodes)
+	if (config.max_inodes < inodes)
 		goto out;
 	/*
 	 * Those tests also disallow limited->unlimited while any are in
@@ -2195,23 +2247,42 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 	 * but we must separately disallow unlimited->limited, because
 	 * in that case we have no record of how much is already in use.
 	 */
-	if (max_blocks && !sbinfo->max_blocks)
+	if (config.max_blocks && !sbinfo->max_blocks)
 		goto out;
-	if (max_inodes && !sbinfo->max_inodes)
+	if (config.max_inodes && !sbinfo->max_inodes)
 		goto out;
 
 	error = 0;
-	sbinfo->max_blocks  = max_blocks;
-	sbinfo->free_blocks = max_blocks - blocks;
-	sbinfo->max_inodes  = max_inodes;
-	sbinfo->free_inodes = max_inodes - inodes;
-	sbinfo->policy = policy;
-	sbinfo->policy_nodes = policy_nodes;
+	sbinfo->max_blocks  = config.max_blocks;
+	sbinfo->free_blocks = config.max_blocks - blocks;
+	sbinfo->max_inodes  = config.max_inodes;
+	sbinfo->free_inodes = config.max_inodes - inodes;
+	sbinfo->policy      = config.policy;
+	sbinfo->policy_nodes = config.policy_nodes;
 out:
 	spin_unlock(&sbinfo->stat_lock);
 	return error;
 }
-#endif
+
+static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(vfs->mnt_sb);
+
+	if (sbinfo->max_blocks != shmem_default_max_blocks())
+		seq_printf(seq, ",size=%luk",
+			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
+	if (sbinfo->max_inodes != shmem_default_max_inodes())
+		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
+		seq_printf(seq, ",mode=%03o", sbinfo->mode);
+	if (sbinfo->uid != 0)
+		seq_printf(seq, ",uid=%u", sbinfo->uid);
+	if (sbinfo->gid != 0)
+		seq_printf(seq, ",gid=%u", sbinfo->gid);
+	shmem_show_mpol(seq, sbinfo->policy, sbinfo->policy_nodes);
+	return 0;
+}
+#endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
 {
@@ -2224,15 +2295,23 @@ static int shmem_fill_super(struct super_block *sb,
 {
 	struct inode *inode;
 	struct dentry *root;
-	int mode   = S_IRWXUGO | S_ISVTX;
-	uid_t uid = current->fsuid;
-	gid_t gid = current->fsgid;
-	int err = -ENOMEM;
 	struct shmem_sb_info *sbinfo;
-	unsigned long blocks = 0;
-	unsigned long inodes = 0;
-	int policy = MPOL_DEFAULT;
-	nodemask_t policy_nodes = node_states[N_HIGH_MEMORY];
+	int err = -ENOMEM;
+
+	/* Round up to L1_CACHE_BYTES to resist false sharing */
+	sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
+				L1_CACHE_BYTES), GFP_KERNEL);
+	if (!sbinfo)
+		return -ENOMEM;
+
+	sbinfo->max_blocks = 0;
+	sbinfo->max_inodes = 0;
+	sbinfo->mode = S_IRWXUGO | S_ISVTX;
+	sbinfo->uid = current->fsuid;
+	sbinfo->gid = current->fsgid;
+	sbinfo->policy = MPOL_DEFAULT;
+	sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+	sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS
 	/*
@@ -2241,34 +2320,22 @@ static int shmem_fill_super(struct super_block *sb,
 	 * but the internal instance is left unlimited.
 	 */
 	if (!(sb->s_flags & MS_NOUSER)) {
-		blocks = totalram_pages / 2;
-		inodes = totalram_pages - totalhigh_pages;
-		if (inodes > blocks)
-			inodes = blocks;
-		if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
-					&inodes, &policy, &policy_nodes))
-			return -EINVAL;
+		sbinfo->max_blocks = shmem_default_max_blocks();
+		sbinfo->max_inodes = shmem_default_max_inodes();
+		if (shmem_parse_options(data, sbinfo, false)) {
+			err = -EINVAL;
+			goto failed;
+		}
 	}
 	sb->s_export_op = &shmem_export_ops;
 #else
 	sb->s_flags |= MS_NOUSER;
 #endif
 
-	/* Round up to L1_CACHE_BYTES to resist false sharing */
-	sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
-				L1_CACHE_BYTES), GFP_KERNEL);
-	if (!sbinfo)
-		return -ENOMEM;
-
 	spin_lock_init(&sbinfo->stat_lock);
-	sbinfo->max_blocks = blocks;
-	sbinfo->free_blocks = blocks;
-	sbinfo->max_inodes = inodes;
-	sbinfo->free_inodes = inodes;
-	sbinfo->policy = policy;
-	sbinfo->policy_nodes = policy_nodes;
+	sbinfo->free_blocks = sbinfo->max_blocks;
+	sbinfo->free_inodes = sbinfo->max_inodes;
 
-	sb->s_fs_info = sbinfo;
 	sb->s_maxbytes = SHMEM_MAX_BYTES;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -2280,11 +2347,11 @@ static int shmem_fill_super(struct super_block *sb,
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
-	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
+	inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0);
 	if (!inode)
 		goto failed;
-	inode->i_uid = uid;
-	inode->i_gid = gid;
+	inode->i_uid = sbinfo->uid;
+	inode->i_gid = sbinfo->gid;
 	root = d_alloc_root(inode);
 	if (!root)
 		goto failed_iput;
@@ -2420,6 +2487,7 @@ static const struct super_operations shmem_ops = {
 #ifdef CONFIG_TMPFS
 	.statfs		= shmem_statfs,
 	.remount_fs	= shmem_remount_fs,
+	.show_options	= shmem_show_options,
 #endif
 	.delete_inode	= shmem_delete_inode,
 	.drop_inode	= generic_delete_inode,
-- 
cgit 


From 20a8143eaa3300a58326156eaf43e03db0fd2cb6 Mon Sep 17 00:00:00 2001
From: Paul Clements <paul.clements@steeleye.com>
Date: Fri, 8 Feb 2008 04:21:51 -0800
Subject: NBD: remove limit on max number of nbd devices

Remove the arbitrary 128 device limit for NBD.  nbds_max can now be set to
any number.  In certain scenarios where devices are used sparsely we have
run into the 128 device limit.

Signed-off-by: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/nbd.c | 10 ++++------
 include/linux/nbd.h |  1 -
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ae3106045ee5..018753c59b8e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -54,7 +54,7 @@ static unsigned int debugflags;
 #endif /* NDEBUG */
 
 static unsigned int nbds_max = 16;
-static struct nbd_device nbd_dev[MAX_NBD];
+static struct nbd_device *nbd_dev;
 
 /*
  * Use just one lock (or at most 1 per NIC). Two arguments for this:
@@ -649,11 +649,9 @@ static int __init nbd_init(void)
 
 	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
 
-	if (nbds_max > MAX_NBD) {
-		printk(KERN_CRIT "nbd: cannot allocate more than %u nbds; %u requested.\n", MAX_NBD,
-				nbds_max);
-		return -EINVAL;
-	}
+	nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+	if (!nbd_dev)
+		return -ENOMEM;
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = alloc_disk(1);
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index cc2b47240a8f..986572081e19 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -35,7 +35,6 @@ enum {
 };
 
 #define nbd_cmd(req) ((req)->cmd[0])
-#define MAX_NBD 128
 
 /* userspace doesn't need the nbd_device structure */
 #ifdef __KERNEL__
-- 
cgit 


From 13d8bcd263cf96c67bd4071ad13cd056dca7b0fb Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 8 Feb 2008 04:21:54 -0800
Subject: use __u32 in linux/reiserfs_fs.h

Since this header is exported to userspace and all the other types in the
header have been scrubbed, this brings the last straggler in line.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reiserfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 422eab4958a6..8e7eff2cd0ab 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -287,7 +287,7 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
 
 /* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
  * which overflows on large file systems. */
-static inline u32 reiserfs_bmap_count(struct super_block *sb)
+static inline __u32 reiserfs_bmap_count(struct super_block *sb)
 {
 	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
 }
-- 
cgit 


From 06b2a76d25d3cfbd14680021c1d356c91be6904e Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Fri, 8 Feb 2008 04:21:57 -0800
Subject: Add new string functions strict_strto* and convert kernel params to
 use them

Currently, for every sysfs node, the callers will be responsible for
implementing store operation, so many many callers are doing duplicate
things to validate input, they have the same mistakes because they are
calling simple_strtol/ul/ll/uul, especially for module params, they are
just numeric, but you can echo such values as 0x1234xxx, 07777888 and
1234aaa, for these cases, module params store operation just ignores
succesive invalid char and converts prefix part to a numeric although input
is acctually invalid.

This patch tries to fix the aforementioned issues and implements
strict_strtox serial functions, kernel/params.c uses them to strictly
validate input, so module params will reject such values as 0x1234xxxx and
returns an error:

write error: Invalid argument

Any modules which export numeric sysfs node can use strict_strtox instead of
simple_strtox to reject any invalid input.

Here are some test results:

Before applying this patch:

[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]#

After applying this patch:

[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000g > /sys/module/e1000/parameters/copybreak
-bash: echo: write error: Invalid argument
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo 0x1000gggggggg > /sys/module/e1000/parameters/copybreak
-bash: echo: write error: Invalid argument
[root@yangyi-dev /]# echo 010000 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# echo 0100008 > /sys/module/e1000/parameters/copybreak
-bash: echo: write error: Invalid argument
[root@yangyi-dev /]# echo 010000aaaaa > /sys/module/e1000/parameters/copybreak
-bash: echo: write error: Invalid argument
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]# echo -n 4096 > /sys/module/e1000/parameters/copybreak
[root@yangyi-dev /]# cat /sys/module/e1000/parameters/copybreak
4096
[root@yangyi-dev /]#

[akpm@linux-foundation.org: fix compiler warnings]
[akpm@linux-foundation.org: fix off-by-one found by tiwai@suse.de]
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Cc: Greg KH <greg@kroah.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |   4 ++
 kernel/params.c        |  20 ++++----
 lib/vsprintf.c         | 123 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8f28d35867f8..2df44e773270 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -141,6 +141,10 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
+extern int strict_strtoul(const char *, unsigned int, unsigned long *);
+extern int strict_strtol(const char *, unsigned int, long *);
+extern int strict_strtoull(const char *, unsigned int, unsigned long long *);
+extern int strict_strtoll(const char *, unsigned int, long long *);
 extern int sprintf(char * buf, const char * fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern int vsprintf(char *buf, const char *, va_list)
diff --git a/kernel/params.c b/kernel/params.c
index e28c70628bb7..afc46a23eb6d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -180,12 +180,12 @@ int parse_args(const char *name,
 #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
 	int param_set_##name(const char *val, struct kernel_param *kp)	\
 	{								\
-		char *endp;						\
 		tmptype l;						\
+		int ret;						\
 									\
 		if (!val) return -EINVAL;				\
-		l = strtolfn(val, &endp, 0);				\
-		if (endp == val || ((type)l != l))			\
+		ret = strtolfn(val, 0, &l);				\
+		if (ret == -EINVAL || ((type)l != l))			\
 			return -EINVAL;					\
 		*((type *)kp->arg) = l;					\
 		return 0;						\
@@ -195,13 +195,13 @@ int parse_args(const char *name,
 		return sprintf(buffer, format, *((type *)kp->arg));	\
 	}
 
-STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul);
-STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol);
-STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul);
-STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol);
-STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul);
-STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol);
-STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul);
+STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
+STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol);
+STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol);
+STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul);
 
 int param_set_charp(const char *val, struct kernel_param *kp)
 {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7b481cea54ae..419993f58c6b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -126,6 +126,129 @@ long long simple_strtoll(const char *cp,char **endp,unsigned int base)
 	return simple_strtoull(cp,endp,base);
 }
 
+
+/**
+ * strict_strtoul - convert a string to an unsigned long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoul converts a string to an unsigned long only if the
+ * string is really an unsigned long string, any string containing
+ * any invalid char at the tail will be rejected and -EINVAL is returned,
+ * only a newline char at the tail is acceptible because people generally
+ * change a module parameter in the following way:
+ *
+ * 	echo 1024 > /sys/module/e1000/parameters/copybreak
+ *
+ * echo will append a newline to the tail.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ *
+ * simple_strtoul just ignores the successive invalid characters and
+ * return the converted value of prefix part of the string.
+ */
+int strict_strtoul(const char *cp, unsigned int base, unsigned long *res);
+
+/**
+ * strict_strtol - convert a string to a long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtol is similiar to strict_strtoul, but it allows the first
+ * character of a string is '-'.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ */
+int strict_strtol(const char *cp, unsigned int base, long *res);
+
+/**
+ * strict_strtoull - convert a string to an unsigned long long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoull converts a string to an unsigned long long only if the
+ * string is really an unsigned long long string, any string containing
+ * any invalid char at the tail will be rejected and -EINVAL is returned,
+ * only a newline char at the tail is acceptible because people generally
+ * change a module parameter in the following way:
+ *
+ * 	echo 1024 > /sys/module/e1000/parameters/copybreak
+ *
+ * echo will append a newline to the tail of the string.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ *
+ * simple_strtoull just ignores the successive invalid characters and
+ * return the converted value of prefix part of the string.
+ */
+int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res);
+
+/**
+ * strict_strtoll - convert a string to a long long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoll is similiar to strict_strtoull, but it allows the first
+ * character of a string is '-'.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ */
+int strict_strtoll(const char *cp, unsigned int base, long long *res);
+
+#define define_strict_strtoux(type, valtype)				\
+int strict_strtou##type(const char *cp, unsigned int base, valtype *res)\
+{									\
+	char *tail;							\
+	valtype val;							\
+	size_t len;							\
+									\
+	*res = 0;							\
+	len = strlen(cp);						\
+	if (len == 0)							\
+		return -EINVAL;						\
+									\
+	val = simple_strtoul(cp, &tail, base);				\
+	if ((*tail == '\0') ||						\
+		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {\
+		*res = val;						\
+		return 0;						\
+	}								\
+									\
+	return -EINVAL;							\
+}									\
+
+#define define_strict_strtox(type, valtype)				\
+int strict_strto##type(const char *cp, unsigned int base, valtype *res)	\
+{									\
+	int ret;							\
+	if (*cp == '-') {						\
+		ret = strict_strtou##type(cp+1, base, res);		\
+		if (ret != 0)						\
+			*res = -(*res);					\
+	} else								\
+		ret = strict_strtou##type(cp, base, res);		\
+									\
+	return ret;							\
+}									\
+
+define_strict_strtoux(l, unsigned long)
+define_strict_strtox(l, long)
+define_strict_strtoux(ll, unsigned long long)
+define_strict_strtox(ll, long long)
+
+EXPORT_SYMBOL(strict_strtoul);
+EXPORT_SYMBOL(strict_strtol);
+EXPORT_SYMBOL(strict_strtoll);
+EXPORT_SYMBOL(strict_strtoull);
+
 static int skip_atoi(const char **s)
 {
 	int i=0;
-- 
cgit 


From b55ab616fa4b00bdd5c470c70fdf87bab85eec68 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 8 Feb 2008 04:21:59 -0800
Subject: preemptible RCU: sparse annotations

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rcupreempt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index ece8eb3e4151..60c2a033b19e 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -46,8 +46,8 @@
 #define rcu_bh_qsctr_inc(cpu)
 #define call_rcu_bh(head, rcu) call_rcu(head, rcu)
 
-extern void __rcu_read_lock(void);
-extern void __rcu_read_unlock(void);
+extern void __rcu_read_lock(void)	__acquires(RCU);
+extern void __rcu_read_unlock(void)	__releases(RCU);
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
-- 
cgit 


From 922f9cfa79b52c85b6002d96cb0eefd13437c58c Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Fri, 8 Feb 2008 04:22:00 -0800
Subject: fs/char_dev.c: chrdev_open marked static and removed from fs.h

There is an outdated comment in serial_core.c also fixed.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/serial_core.c | 4 ++--
 fs/char_dev.c                | 2 +-
 include/linux/fs.h           | 1 -
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index efef494d945a..0f5a17987cca 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1553,8 +1553,8 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 }
 
 /*
- * In 2.4.5, calls to uart_open are serialised by the BKL in
- *   linux/fs/devices.c:chrdev_open()
+ * calls to uart_open are serialised by the BKL in
+ *   fs/char_dev.c:chrdev_open()
  * Note that if this fails, then uart_close() _will_ be called.
  *
  * In time, we want to scrap the "opening nonpresent ports"
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 2c7a8b5b4598..038674aa88a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -357,7 +357,7 @@ void cdev_put(struct cdev *p)
 /*
  * Called every time a character special file is opened
  */
-int chrdev_open(struct inode * inode, struct file * filp)
+static int chrdev_open(struct inode *inode, struct file *filp)
 {
 	struct cdev *p;
 	struct cdev *new = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cb3a9001f3b9..18cfbf76ec5b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1624,7 +1624,6 @@ extern int register_chrdev(unsigned int, const char *,
 			   const struct file_operations *);
 extern void unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
-extern int chrdev_open(struct inode *, struct file *);
 extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
-- 
cgit 


From 46f4f8f665080900e865392f4b3593be463bf0d8 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 8 Feb 2008 04:22:01 -0800
Subject: IRQ_NOPROBE helper functions

Probing non-ISA interrupts using the handle_percpu_irq as their handle_irq
method may crash the system because handle_percpu_irq does not check
IRQ_WAITING.  This for example hits the MIPS Qemu configuration.

This patch provides two helper functions set_irq_noprobe and set_irq_probe to
set rsp.  clear the IRQ_NOPROBE flag.  The only current caller is MIPS code
but this really belongs into generic code.

As an aside, interrupt probing these days has become a mostly obsolete if not
dangerous art.  I think Linux interrupts should be changed to default to
non-probing but that's subject of this patch.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-and-tested-by: Rob Landley <rob@landley.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irq.h |  3 +++
 kernel/irq/chip.c   | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a19b381d4112..bfd9efb5cb49 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -367,6 +367,9 @@ set_irq_chained_handler(unsigned int irq,
 	__set_irq_handler(irq, handle, 1, NULL);
 }
 
+extern void set_irq_noprobe(unsigned int irq);
+extern void set_irq_probe(unsigned int irq);
+
 /* Handle dynamic irq creation and destruction */
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 10e006643c8c..cc54c6276356 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -589,3 +589,39 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 	set_irq_chip(irq, chip);
 	__set_irq_handler(irq, handle, 0, name);
 }
+
+void __init set_irq_noprobe(unsigned int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
+
+		return;
+	}
+
+	desc = irq_desc + irq;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status |= IRQ_NOPROBE;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+void __init set_irq_probe(unsigned int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
+
+		return;
+	}
+
+	desc = irq_desc + irq;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status &= ~IRQ_NOPROBE;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
-- 
cgit 


From 2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 8 Feb 2008 04:22:04 -0800
Subject: CONFIG_HIGHPTE vs. sub-page page tables.

Background: I've implemented 1K/2K page tables for s390.  These sub-page
page tables are required to properly support the s390 virtualization
instruction with KVM.  The SIE instruction requires that the page tables
have 256 page table entries (pte) followed by 256 page status table entries
(pgste).  The pgstes are only required if the process is using the SIE
instruction.  The pgstes are updated by the hardware and by the hypervisor
for a number of reasons, one of them is dirty and reference bit tracking.
To avoid wasting memory the standard pte table allocation should return
1K/2K (31/64 bit) and 2K/4K if the process is using SIE.

Problem: Page size on s390 is 4K, page table size is 1K or 2K.  That means
the s390 version for pte_alloc_one cannot return a pointer to a struct
page.  Trouble is that with the CONFIG_HIGHPTE feature on x86 pte_alloc_one
cannot return a pointer to a pte either, since that would require more than
32 bit for the return value of pte_alloc_one (and the pte * would not be
accessible since its not kmapped).

Solution: The only solution I found to this dilemma is a new typedef: a
pgtable_t.  For s390 pgtable_t will be a (pte *) - to be introduced with a
later patch.  For everybody else it will be a (struct page *).  The
additional problem with the initialization of the ptl lock and the
NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor and
a destructor pgtable_page_dtor.  The page table allocation and free
functions need to call these two whenever a page table page is allocated or
freed.  pmd_populate will get a pgtable_t instead of a struct page pointer.
 To get the pgtable_t back from a pmd entry that has been installed with
pmd_populate a new function pmd_pgtable is added.  It replaces the pmd_page
call in free_pte_range and apply_to_pte_range.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/mm/pgalloc.c               |  8 +++++---
 arch/powerpc/mm/pgtable_32.c        | 14 ++++++++------
 arch/ppc/mm/pgtable.c               |  9 ++++++---
 arch/s390/mm/pgtable.c              |  2 ++
 arch/sparc/mm/srmmu.c               | 10 +++++++---
 arch/sparc/mm/sun4c.c               | 14 ++++++++++----
 arch/um/kernel/mem.c                |  4 +++-
 arch/x86/mm/pgtable_32.c            |  5 ++++-
 include/asm-alpha/page.h            |  2 ++
 include/asm-alpha/pgalloc.h         | 22 ++++++++++++++--------
 include/asm-arm/page.h              |  2 ++
 include/asm-arm/pgalloc.h           |  9 ++++++---
 include/asm-avr32/page.h            |  1 +
 include/asm-avr32/pgalloc.h         | 16 ++++++++++++----
 include/asm-cris/page.h             |  1 +
 include/asm-cris/pgalloc.h          | 14 ++++++++++----
 include/asm-frv/page.h              |  1 +
 include/asm-frv/pgalloc.h           | 12 +++++++++---
 include/asm-ia64/page.h             |  2 ++
 include/asm-ia64/pgalloc.h          | 20 ++++++++++++++------
 include/asm-m32r/page.h             |  1 +
 include/asm-m32r/pgalloc.h          | 10 ++++++----
 include/asm-m68k/motorola_pgalloc.h | 14 ++++++++------
 include/asm-m68k/page.h             |  1 +
 include/asm-m68k/sun3_pgalloc.h     | 17 ++++++++++++-----
 include/asm-mips/page.h             |  1 +
 include/asm-mips/pgalloc.h          | 17 ++++++++++++-----
 include/asm-parisc/page.h           |  1 +
 include/asm-parisc/pgalloc.h        | 11 +++++++++--
 include/asm-powerpc/page.h          |  2 ++
 include/asm-powerpc/pgalloc-32.h    |  6 ++++--
 include/asm-powerpc/pgalloc-64.h    | 27 ++++++++++++++++++++-------
 include/asm-ppc/pgalloc.h           |  6 ++++--
 include/asm-s390/page.h             |  2 ++
 include/asm-s390/pgalloc.h          |  3 ++-
 include/asm-s390/tlb.h              |  2 +-
 include/asm-sh/page.h               |  2 ++
 include/asm-sh/pgalloc.h            | 27 ++++++++++++++++++++-------
 include/asm-sparc/page.h            |  2 ++
 include/asm-sparc/pgalloc.h         |  5 +++--
 include/asm-sparc64/page.h          |  2 ++
 include/asm-sparc64/pgalloc.h       | 19 ++++++++++++++-----
 include/asm-um/page.h               |  2 ++
 include/asm-um/pgalloc.h            | 12 +++++++++---
 include/asm-x86/page_32.h           |  2 ++
 include/asm-x86/page_64.h           |  2 ++
 include/asm-x86/pgalloc_32.h        |  6 ++++--
 include/asm-x86/pgalloc_64.h        | 22 +++++++++++++++++-----
 include/asm-xtensa/page.h           |  1 +
 include/asm-xtensa/pgalloc.h        | 17 ++++++++++++-----
 include/linux/mm.h                  | 14 +++++++++++++-
 mm/memory.c                         | 32 +++++++++++++++-----------------
 mm/vmalloc.c                        |  2 +-
 53 files changed, 326 insertions(+), 132 deletions(-)

(limited to 'include/linux')

diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 1a2e5c8d03a9..66f616fb4860 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -28,7 +28,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *page;
 
@@ -37,9 +37,11 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 #else
 	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 #endif
-	if (page)
+	if (page) {
 		clear_highpage(page);
-	flush_dcache_page(page);
+		pgtable_page_ctor(page);
+		flush_dcache_page(page);
+	}
 	return page;
 }
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index f80f90c4d58b..ac3390f81900 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -107,19 +107,20 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	return pte;
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO;
 #else
-	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
-	if (ptepage)
-		clear_highpage(ptepage);
+	if (!ptepage)
+		return NULL;
+	pgtable_page_ctor(ptepage);
 	return ptepage;
 }
 
@@ -131,11 +132,12 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
+	pgtable_page_dtor(ptepage);
 	__free_page(ptepage);
 }
 
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 409fcaa4994a..03a79bff1271 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -95,7 +95,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	return pte;
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *ptepage;
 
@@ -106,8 +106,10 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 #endif
 
 	ptepage = alloc_pages(flags, 0);
-	if (ptepage)
+	if (ptepage) {
 		clear_highpage(ptepage);
+		pgtable_page_ctor(ptepage);
+	}
 	return ptepage;
 }
 
@@ -119,11 +121,12 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
+	pgtable_page_dtor(ptepage);
 	__free_page(ptepage);
 }
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index e60e0ae13402..019f518cd5a0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -78,6 +78,7 @@ unsigned long *page_table_alloc(int noexec)
 		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		page->index = (addr_t) table;
 	}
+	pgtable_page_ctor(page);
 	table = (unsigned long *) page_to_phys(page);
 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 	return table;
@@ -87,6 +88,7 @@ void page_table_free(unsigned long *table)
 {
 	unsigned long *shadow = get_shadow_pte(table);
 
+	pgtable_page_dtor(virt_to_page(table));
 	if (shadow)
 		free_page((unsigned long) shadow);
 	free_page((unsigned long) table);
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index dc98e3844a0a..23d3291a3e81 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -489,14 +489,17 @@ srmmu_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return (pte_t *)srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 }
 
-static struct page *
+static pgtable_t
 srmmu_pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	unsigned long pte;
+	struct page *page;
 
 	if ((pte = (unsigned long)srmmu_pte_alloc_one_kernel(mm, address)) == 0)
 		return NULL;
-	return pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+	page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static void srmmu_free_pte_fast(pte_t *pte)
@@ -504,10 +507,11 @@ static void srmmu_free_pte_fast(pte_t *pte)
 	srmmu_free_nocache((unsigned long)pte, PTE_SIZE);
 }
 
-static void srmmu_pte_free(struct page *pte)
+static void srmmu_pte_free(pgtable_t pte)
 {
 	unsigned long p;
 
+	pgtable_page_dtor(pte);
 	p = (unsigned long)page_address(pte);	/* Cached address (for test) */
 	if (p == 0)
 		BUG();
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 0729305f2f59..c0442e8c4b15 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1947,12 +1947,17 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	return pte;
 }
 
-static struct page *sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = sun4c_pte_alloc_one_kernel(mm, address);
+	pte_t *pte;
+	struct page *page;
+
+	pte = sun4c_pte_alloc_one_kernel(mm, address);
 	if (pte == NULL)
 		return NULL;
-	return virt_to_page(pte);
+	page = virt_to_page(pte);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline void sun4c_free_pte_fast(pte_t *pte)
@@ -1962,8 +1967,9 @@ static inline void sun4c_free_pte_fast(pte_t *pte)
 	pgtable_cache_size++;
 }
 
-static void sun4c_pte_free(struct page *pte)
+static void sun4c_pte_free(pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	sun4c_free_pte_fast(page_address(pte));
 }
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index d872fdce1d7e..2627ce82e918 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -354,11 +354,13 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
 	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	if (pte)
+		pgtable_page_ctor(pte);
 	return pte;
 }
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6c1914622a88..73aba7125203 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -183,7 +183,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
@@ -192,6 +192,8 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 #else
 	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
 #endif
+	if (pte)
+		pgtable_page_ctor(pte);
 	return pte;
 }
 
@@ -365,6 +367,7 @@ void check_pgt_cache(void)
 
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
+	pgtable_page_dtor(pte);
 	paravirt_release_pt(page_to_pfn(pte));
 	tlb_remove_page(tlb, pte);
 }
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index 05f09f997d82..22ff9762d17b 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -62,6 +62,8 @@ typedef unsigned long pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
+typedef struct page *pgtable_t;
+
 #ifdef USE_48_BIT_KSEG
 #define PAGE_OFFSET		0xffff800000000000UL
 #else
diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h
index fdbedacc7375..fd090155dccd 100644
--- a/include/asm-alpha/pgalloc.h
+++ b/include/asm-alpha/pgalloc.h
@@ -11,10 +11,11 @@
  */
 
 static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
 {
 	pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
@@ -57,18 +58,23 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t
+pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, addr);
-	if (pte)
-		return virt_to_page(pte);
-	return NULL;
+	pte_t *pte = pte_alloc_one_kernel(mm, address);
+	struct page *page;
+
+	if (!pte)
+		return NULL;
+	page = virt_to_page(pte);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline void
-pte_free(struct mm_struct *mm, struct page *page)
+pte_free(struct mm_struct *mm, pgtable_t page)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }
 
diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h
index 31ff12f4ffb7..c86f68ee6511 100644
--- a/include/asm-arm/page.h
+++ b/include/asm-arm/page.h
@@ -171,6 +171,8 @@ typedef unsigned long pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
+typedef struct page *pgtable_t;
+
 #endif /* CONFIG_MMU */
 
 #include <asm/memory.h>
diff --git a/include/asm-arm/pgalloc.h b/include/asm-arm/pgalloc.h
index fb6c6e3222bd..163b0305dd76 100644
--- a/include/asm-arm/pgalloc.h
+++ b/include/asm-arm/pgalloc.h
@@ -66,7 +66,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 	return pte;
 }
 
-static inline struct page *
+static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *pte;
@@ -75,6 +75,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 	if (pte) {
 		void *page = page_address(pte);
 		clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE);
+		pgtable_page_ctor(pte);
 	}
 
 	return pte;
@@ -91,8 +92,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	}
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
@@ -123,10 +125,11 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 }
 
 static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
+pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
 {
 	__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 #endif /* CONFIG_MMU */
 
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
index ee23499cec34..5582968feee8 100644
--- a/include/asm-avr32/page.h
+++ b/include/asm-avr32/page.h
@@ -34,6 +34,7 @@ extern void copy_page(void *to, void *from);
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
 
 #define pte_val(x)		((x).pte)
 #define pgd_val(x)		((x).pgd)
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
index b77e364b4c44..51fc1f6e4b17 100644
--- a/include/asm-avr32/pgalloc.h
+++ b/include/asm-avr32/pgalloc.h
@@ -17,10 +17,11 @@
 	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
 
 static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-				    struct page *pte)
+				    pgtable_t pte)
 {
 	set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables
@@ -51,7 +52,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	struct page *pte;
 
 	pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-
+	if (!pte)
+		return NULL;
+	pgtable_page_ctor(pte);
 	return pte;
 }
 
@@ -60,12 +63,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb), pte);			\
+} while (0)
 
 #define check_pgt_cache() do { } while(0)
 
diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index 3b0156c46311..c45bb1ef397c 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -26,6 +26,7 @@
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
 #endif
 
 #define pte_val(x)	((x).pte)
diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h
index 8ddd66f81773..a1ba761d0573 100644
--- a/include/asm-cris/pgalloc.h
+++ b/include/asm-cris/pgalloc.h
@@ -6,6 +6,7 @@
 
 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
 #define pmd_populate(mm, pmd, pte) pmd_set(pmd, page_address(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
@@ -27,10 +28,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
  	return pte;
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pgtable_page_ctor(pte);
 	return pte;
 }
 
@@ -39,13 +41,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
-
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb), pte);			\
+} while (0)
 
 #define check_pgt_cache()          do { } while (0)
 
diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h
index cacc045700de..c2c1e89e747d 100644
--- a/include/asm-frv/page.h
+++ b/include/asm-frv/page.h
@@ -25,6 +25,7 @@ typedef struct { unsigned long	ste[64];} pmd_t;
 typedef struct { pmd_t		pue[1]; } pud_t;
 typedef struct { pud_t		pge[1];	} pgd_t;
 typedef struct { unsigned long	pgprot;	} pgprot_t;
+typedef struct page *pgtable_t;
 
 #define pte_val(x)	((x).pte)
 #define pmd_val(x)	((x).ste[0])
diff --git a/include/asm-frv/pgalloc.h b/include/asm-frv/pgalloc.h
index e89620ef08ca..971e6addb009 100644
--- a/include/asm-frv/pgalloc.h
+++ b/include/asm-frv/pgalloc.h
@@ -25,6 +25,7 @@
 do {										\
 	__set_pmd((PMD), page_to_pfn(PAGE) << PAGE_SHIFT | _PAGE_TABLE);	\
 } while(0)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
@@ -35,19 +36,24 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
 
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)		tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb),(pte));			\
+} while (0)
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 8a8aa3fd7cd4..4999a6c63775 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -185,6 +185,7 @@ get_order (unsigned long size)
 #endif
   typedef struct { unsigned long pgd; } pgd_t;
   typedef struct { unsigned long pgprot; } pgprot_t;
+  typedef struct page *pgtable_t;
 
 # define pte_val(x)	((x).pte)
 # define pmd_val(x)	((x).pmd)
@@ -206,6 +207,7 @@ get_order (unsigned long size)
     typedef unsigned long pmd_t;
     typedef unsigned long pgd_t;
     typedef unsigned long pgprot_t;
+    typedef struct page *pgtable_t;
 # endif
 
 # define pte_val(x)	(x)
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 556d988123ac..b9ac1a6fc216 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -70,10 +70,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
 
 static inline void
-pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
 {
 	pmd_val(*pmd_entry) = page_to_phys(pte);
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
@@ -81,11 +82,17 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
-	return pg ? virt_to_page(pg) : NULL;
+	struct page *page;
+	void *pg;
+
+	pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+	if (!pg)
+		return NULL;
+	page = virt_to_page(pg);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -94,8 +101,9 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	quicklist_free_page(0, NULL, pte);
 }
 
diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h
index 05d43bbbf940..8a677f3fca68 100644
--- a/include/asm-m32r/page.h
+++ b/include/asm-m32r/page.h
@@ -28,6 +28,7 @@ typedef struct { unsigned long pgd; } pgd_t;
 #define PTE_MASK	PAGE_MASK
 
 typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
 
 #define pmd_val(x)	((x).pmd)
 #define pgd_val(x)	((x).pgd)
diff --git a/include/asm-m32r/pgalloc.h b/include/asm-m32r/pgalloc.h
index e5921adfad1b..f11a2b909cdb 100644
--- a/include/asm-m32r/pgalloc.h
+++ b/include/asm-m32r/pgalloc.h
@@ -9,10 +9,11 @@
 	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
 
 static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-	struct page *pte)
+	pgtable_t pte)
 {
 	set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
@@ -37,12 +38,12 @@ static __inline__ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return pte;
 }
 
-static __inline__ struct page *pte_alloc_one(struct mm_struct *mm,
+static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm,
 	unsigned long address)
 {
 	struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
 
-
+	pgtable_page_ctor(pte);
 	return pte;
 }
 
@@ -51,8 +52,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h
index 500ec9b8b189..d08bf6261df8 100644
--- a/include/asm-m68k/motorola_pgalloc.h
+++ b/include/asm-m68k/motorola_pgalloc.h
@@ -7,7 +7,6 @@
 extern pmd_t *get_pointer_table(void);
 extern int free_pointer_table(pmd_t *);
 
-
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
@@ -28,7 +27,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long) pte);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
 	pte_t *pte;
@@ -43,19 +42,21 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add
 		nocache_page(pte);
 	}
 	kunmap(pte);
-
+	pgtable_page_ctor(page);
 	return page;
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 {
+	pgtable_page_dtor(page);
 	cache_page(kmap(page));
 	kunmap(page);
 	__free_page(page);
 }
 
-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
 {
+	pgtable_page_dtor(page);
 	cache_page(kmap(page));
 	kunmap(page);
 	__free_page(page);
@@ -94,10 +95,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *
 	pmd_set(pmd, pte);
 }
 
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
 {
 	pmd_set(pmd, page_address(page));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 {
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index 3f29e2a03a43..880c2cbff8a6 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -91,6 +91,7 @@ typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd[16]; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
 
 #define pte_val(x)	((x).pte)
 #define pmd_val(x)	((&x)->pmd[0])
diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h
index a5a91e72714b..d4c83f143816 100644
--- a/include/asm-m68k/sun3_pgalloc.h
+++ b/include/asm-m68k/sun3_pgalloc.h
@@ -26,12 +26,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
         free_page((unsigned long) pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 {
+	pgtable_page_dtor(page);
         __free_page(page);
 }
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb), pte);			\
+} while (0)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
@@ -45,8 +50,8 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return (pte_t *) (page);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long address)
 {
         struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 
@@ -54,6 +59,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 		return NULL;
 
 	clear_highpage(page);
+	pgtable_page_ctor(page);
 	return page;
 
 }
@@ -63,10 +69,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *
 	pmd_val(*pmd) = __pa((unsigned long)pte);
 }
 
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
 {
 	pmd_val(*pmd) = __pa((unsigned long)page_address(page));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index 635aa44d2290..8735aa0b8963 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -90,6 +90,7 @@ typedef struct { unsigned long pte; } pte_t;
 #define pte_val(x)	((x).pte)
 #define __pte(x)	((pte_t) { (x) } )
 #endif
+typedef struct page *pgtable_t;
 
 /*
  * For 3-level pagetables we defines these ourselves, for 2-level the
diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h
index c4efeced8396..1275831dda29 100644
--- a/include/asm-mips/pgalloc.h
+++ b/include/asm-mips/pgalloc.h
@@ -20,10 +20,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-	struct page *pte)
+	pgtable_t pte)
 {
 	set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Initialize a new pmd table with invalid pointers.
@@ -79,9 +80,10 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	struct page *pte;
 
 	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
-	if (pte)
+	if (pte) {
 		clear_highpage(pte);
-
+		pgtable_page_ctor(pte);
+	}
 	return pte;
 }
 
@@ -90,12 +92,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_pages((unsigned long)pte, PTE_ORDER);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_pages(pte, PTE_ORDER);
 }
 
-#define __pte_free_tlb(tlb, pte)	tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb), pte);			\
+} while (0)
 
 #ifdef CONFIG_32BIT
 
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index b08d9151c71e..27d50b859541 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -91,6 +91,7 @@ typedef unsigned long pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
+typedef struct page *pgtable_t;
 
 typedef struct __physmem_range {
 	unsigned long start_pfn;
diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h
index aab66f1bea14..3996dfc30a3f 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/include/asm-parisc/pgalloc.h
@@ -115,11 +115,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
-static inline struct page *
+static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	if (page)
+		pgtable_page_ctor(page);
 	return page;
 }
 
@@ -135,7 +138,11 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-#define pte_free(mm, page) pte_free_kernel(page_address(page))
+static inline void pte_free_kernel(struct mm_struct *mm, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	pte_free_kernel(page_address((pte));
+}
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index 61e3725bbd37..df47bbb6ea13 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -190,6 +190,8 @@ extern int page_is_ram(unsigned long pfn);
 
 struct vm_area_struct;
 
+typedef struct page *pgtable_t;
+
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-powerpc/pgalloc-32.h b/include/asm-powerpc/pgalloc-32.h
index c162a4c37b39..58c07147b3ea 100644
--- a/include/asm-powerpc/pgalloc-32.h
+++ b/include/asm-powerpc/pgalloc-32.h
@@ -22,17 +22,19 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 		(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
 #define pmd_populate(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 #else
 #define pmd_populate_kernel(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
 #define pmd_populate(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
 extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);
 
 #define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
 
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 5afae8593931..68980990f62a 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -58,6 +58,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 
 #else /* CONFIG_PPC_64K_PAGES */
@@ -72,6 +73,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 #endif /* CONFIG_PPC_64K_PAGES */
 
@@ -92,11 +94,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
         return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long address)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, address);
-	return pte ? virt_to_page(pte) : NULL;
+	struct page *page;
+	pte_t *pte;
+
+	pte = pte_alloc_one_kernel(mm, address);
+	if (!pte)
+		return NULL;
+	page = virt_to_page(pte);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -104,8 +113,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
+	pgtable_page_dtor(ptepage);
 	__free_page(ptepage);
 }
 
@@ -136,9 +146,12 @@ static inline void pgtable_free(pgtable_free_t pgf)
 
 extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
 
-#define __pte_free_tlb(tlb, ptepage)	\
+#define __pte_free_tlb(tlb,ptepage)	\
+do { \
+	pgtable_page_dtor(ptepage); \
 	pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
-		PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
+		PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+} while (0)
 #define __pmd_free_tlb(tlb, pmd) 	\
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
diff --git a/include/asm-ppc/pgalloc.h b/include/asm-ppc/pgalloc.h
index 7c39a95829c7..fd4d1d74cfb1 100644
--- a/include/asm-ppc/pgalloc.h
+++ b/include/asm-ppc/pgalloc.h
@@ -23,17 +23,19 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 		(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
 #define pmd_populate(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 #else
 #define pmd_populate_kernel(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
 #define pmd_populate(mm, pmd, pte)	\
 		(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
 extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);
 
 #define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
 
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index a55f9d979dfb..7f29a981f48c 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -109,6 +109,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 
 #endif /* __s390x__ */
 
+typedef struct page *pgtable_t;
+
 #define __pte(x)        ((pte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
 #define __pgd(x)        ((pgd_t) { (x) } )
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 6f6619ba8980..900d44807e10 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -132,7 +132,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 }
 
 static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
 {
 	pte_t *pte = (pte_t *)page_to_phys(page);
 	pmd_t *shadow_pmd = get_shadow_table(pmd);
@@ -142,6 +142,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
 	if (shadow_pmd && shadow_pte)
 		pmd_populate_kernel(mm, shadow_pmd, shadow_pte);
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * page table entry allocation/free routines.
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 985de2b88279..3c8177fa9e06 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -95,7 +95,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
  * pte_free_tlb frees a pte table and clears the CRSTE for the
  * page table from the tlb.
  */
-static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
 {
 	if (!tlb->fullmm) {
 		tlb->array[tlb->nr_ptes++] = page;
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index e0fe02950f52..134562dc8c45 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -100,6 +100,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 #define __pgd(x) ((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
+typedef struct page *pgtable_t;
+
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h
index 59ca16d77a1d..84dd2db7104c 100644
--- a/include/asm-sh/pgalloc.h
+++ b/include/asm-sh/pgalloc.h
@@ -14,10 +14,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-				struct page *pte)
+				pgtable_t pte)
 {
 	set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline void pgd_ctor(void *x)
 {
@@ -47,11 +48,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long address)
 {
-	void *pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
-	return pg ? virt_to_page(pg) : NULL;
+	struct page *page;
+	void *pg;
+
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	if (!pg)
+		return NULL;
+	page = virt_to_page(pg);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -59,12 +67,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	quicklist_free(QUICK_PT, NULL, pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb), (pte));			\
+} while (0)
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h
index cbc48c0c4e15..39ccf2da297c 100644
--- a/include/asm-sparc/page.h
+++ b/include/asm-sparc/page.h
@@ -123,6 +123,8 @@ typedef unsigned long iopgprot_t;
 
 #endif
 
+typedef struct page *pgtable_t;
+
 extern unsigned long sparc_unmapped_base;
 
 BTFIXUPDEF_SETHI(sparc_unmapped_base)
diff --git a/include/asm-sparc/pgalloc.h b/include/asm-sparc/pgalloc.h
index b5fbdd36447f..6292cd00e5af 100644
--- a/include/asm-sparc/pgalloc.h
+++ b/include/asm-sparc/pgalloc.h
@@ -50,10 +50,11 @@ BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *)
 
 BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
 #define pmd_populate(MM, PMD, PTE)        BTFIXUP_CALL(pmd_populate)(PMD, PTE)
+#define pmd_pgtable(pmd) pmd_page(pmd)
 BTFIXUPDEF_CALL(void, pmd_set, pmd_t *, pte_t *)
 #define pmd_populate_kernel(MM, PMD, PTE) BTFIXUP_CALL(pmd_set)(PMD, PTE)
 
-BTFIXUPDEF_CALL(struct page *, pte_alloc_one, struct mm_struct *, unsigned long)
+BTFIXUPDEF_CALL(pgtable_t , pte_alloc_one, struct mm_struct *, unsigned long)
 #define pte_alloc_one(mm, address)	BTFIXUP_CALL(pte_alloc_one)(mm, address)
 BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long)
 #define pte_alloc_one_kernel(mm, addr)	BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr)
@@ -61,7 +62,7 @@ BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long
 BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *)
 #define pte_free_kernel(mm, pte)	BTFIXUP_CALL(free_pte_fast)(pte)
 
-BTFIXUPDEF_CALL(void, pte_free, struct page *)
+BTFIXUPDEF_CALL(void, pte_free, pgtable_t )
 #define pte_free(mm, pte)	BTFIXUP_CALL(pte_free)(pte)
 #define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
 
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index cdf950e017ee..e93a482aa24a 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -104,6 +104,8 @@ typedef unsigned long pgprot_t;
 
 #endif /* (STRICT_MM_TYPECHECKS) */
 
+typedef struct page *pgtable_t;
+
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_32BIT) ? \
 				 (_AC(0x0000000070000000,UL)) : \
 				 (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index b48f73c2274e..3ee2d406373b 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -43,11 +43,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm,
-					 unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long address)
 {
-	void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
-	return pg ? virt_to_page(pg) : NULL;
+	struct page *page;
+	void *pg;
+
+	pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+	if (!pg)
+		return NULL;
+	page = virt_to_page(pg);
+	pgtable_page_ctor(page);
+	return page;
 }
 		
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -55,8 +62,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	quicklist_free(0, NULL, pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
+	pgtable_page_dtor(ptepage);
 	quicklist_free_page(0, NULL, ptepage);
 }
 
@@ -64,6 +72,7 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 #define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(PMD, PTE)
 #define pmd_populate(MM,PMD,PTE_PAGE)		\
 	pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline void check_pgt_cache(void)
 {
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index fe2374d705d1..381f96b1c825 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -79,6 +79,8 @@ typedef unsigned long phys_t;
 
 typedef struct { unsigned long pgprot; } pgprot_t;
 
+typedef struct page *pgtable_t;
+
 #define pgd_val(x)	((x).pgd)
 #define pgprot_val(x)	((x).pgprot)
 
diff --git a/include/asm-um/pgalloc.h b/include/asm-um/pgalloc.h
index 4f3e62b02861..9062a6e72241 100644
--- a/include/asm-um/pgalloc.h
+++ b/include/asm-um/pgalloc.h
@@ -18,6 +18,7 @@
 	set_pmd(pmd, __pmd(_PAGE_TABLE +			\
 		((unsigned long long)page_to_pfn(pte) <<	\
 			(unsigned long long) PAGE_SHIFT)))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
@@ -26,19 +27,24 @@ extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long) pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor(pte);				\
+	tlb_remove_page((tlb),(pte));			\
+} while (0)
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index a6fd10f230d2..ba715d9798b0 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -50,6 +50,8 @@ typedef unsigned long	phys_addr_t;
 typedef union { pteval_t pte, pte_low; } pte_t;
 typedef pte_t boot_pte_t;
 
+typedef struct page *pgtable_t;
+
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
 
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index dcf0c0746075..f7393bc516ef 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -71,6 +71,8 @@ typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 
+typedef struct page *pgtable_t;
+
 typedef struct { pteval_t pte; } pte_t;
 
 #define vmemmap ((struct page *)VMEMMAP_START)
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index bab12718a913..6bea6e5b5ee5 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -31,6 +31,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p
 	paravirt_alloc_pt(mm, pfn);
 	set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
@@ -39,15 +40,16 @@ extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 }
 
diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h
index 4f6220db22b1..8d6722320dcc 100644
--- a/include/asm-x86/pgalloc_64.h
+++ b/include/asm-x86/pgalloc_64.h
@@ -12,6 +12,8 @@
 #define pgd_populate(mm, pgd, pud) \
 		set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))
 
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
 {
 	set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
@@ -91,12 +93,17 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
 	return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	struct page *page;
+	void *p;
+
+	p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 	if (!p)
 		return NULL;
-	return virt_to_page(p);
+	page = virt_to_page(p);
+	pgtable_page_ctor(page);
+	return page;
 }
 
 /* Should really implement gc for free page table pages. This could be
@@ -108,12 +115,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	free_page((unsigned long)pte); 
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
+	pgtable_page_dtor(pte);
 	__free_page(pte);
 } 
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte)				\
+do {							\
+	pgtable_page_dtor((pte));				\
+	tlb_remove_page((tlb), (pte));			\
+} while (0)
 
 #define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
 #define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 1adedbf41d01..80a6ae0dd259 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -98,6 +98,7 @@
 typedef struct { unsigned long pte; } pte_t;		/* page table entry */
 typedef struct { unsigned long pgd; } pgd_t;		/* PGD table entry */
 typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
 
 #define pte_val(x)	((x).pte)
 #define pgd_val(x)	((x).pgd)
diff --git a/include/asm-xtensa/pgalloc.h b/include/asm-xtensa/pgalloc.h
index 1d51ba5463f9..8d1544eb461e 100644
--- a/include/asm-xtensa/pgalloc.h
+++ b/include/asm-xtensa/pgalloc.h
@@ -24,6 +24,7 @@
 	(pmd_val(*(pmdp)) = ((unsigned long)ptep))
 #define pmd_populate(mm, pmdp, page)					     \
 	(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pgd_t*
 pgd_alloc(struct mm_struct *mm)
@@ -46,10 +47,14 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm, 
-					 unsigned long addr)
+static inline pte_token_t pte_alloc_one(struct mm_struct *mm,
+					unsigned long addr)
 {
-	return virt_to_page(pte_alloc_one_kernel(mm, addr));
+	struct page *page;
+
+	page = virt_to_page(pte_alloc_one_kernel(mm, addr));
+	pgtable_page_ctor(page);
+	return page;
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -57,10 +62,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 	kmem_cache_free(pgtable_cache, pte);
 }
 
-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
-	kmem_cache_free(pgtable_cache, page_address(page));
+	pgtable_page_dtor(pte);
+	kmem_cache_free(pgtable_cache, page_address(pte));
 }
+#define pmd_pgtable(pmd) pmd_page(pmd)
 
 #endif /* __KERNEL__ */
 #endif /* _XTENSA_PGALLOC_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 89d7c691b93a..e8abb3814209 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -894,6 +894,18 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
 #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
 
+static inline void pgtable_page_ctor(struct page *page)
+{
+	pte_lock_init(page);
+	inc_zone_page_state(page, NR_PAGETABLE);
+}
+
+static inline void pgtable_page_dtor(struct page *page)
+{
+	pte_lock_deinit(page);
+	dec_zone_page_state(page, NR_PAGETABLE);
+}
+
 #define pte_offset_map_lock(mm, pmd, address, ptlp)	\
 ({							\
 	spinlock_t *__ptl = pte_lockptr(mm, pmd);	\
@@ -1136,7 +1148,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 
-typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr,
+typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);
diff --git a/mm/memory.c b/mm/memory.c
index 153a54b2013c..e5628a5fd678 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -134,11 +134,9 @@ void pmd_clear_bad(pmd_t *pmd)
  */
 static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
 {
-	struct page *page = pmd_page(*pmd);
+	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
-	pte_lock_deinit(page);
-	pte_free_tlb(tlb, page);
-	dec_zone_page_state(page, NR_PAGETABLE);
+	pte_free_tlb(tlb, token);
 	tlb->mm->nr_ptes--;
 }
 
@@ -309,21 +307,19 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
 
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
-	struct page *new = pte_alloc_one(mm, address);
+	pgtable_t new = pte_alloc_one(mm, address);
 	if (!new)
 		return -ENOMEM;
 
-	pte_lock_init(new);
 	spin_lock(&mm->page_table_lock);
-	if (pmd_present(*pmd)) {	/* Another has populated it */
-		pte_lock_deinit(new);
-		pte_free(mm, new);
-	} else {
+	if (!pmd_present(*pmd)) {	/* Has another populated it ? */
 		mm->nr_ptes++;
-		inc_zone_page_state(new, NR_PAGETABLE);
 		pmd_populate(mm, pmd, new);
+		new = NULL;
 	}
 	spin_unlock(&mm->page_table_lock);
+	if (new)
+		pte_free(mm, new);
 	return 0;
 }
 
@@ -334,11 +330,13 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 		return -ENOMEM;
 
 	spin_lock(&init_mm.page_table_lock);
-	if (pmd_present(*pmd))		/* Another has populated it */
-		pte_free_kernel(&init_mm, new);
-	else
+	if (!pmd_present(*pmd)) {	/* Has another populated it ? */
 		pmd_populate_kernel(&init_mm, pmd, new);
+		new = NULL;
+	}
 	spin_unlock(&init_mm.page_table_lock);
+	if (new)
+		pte_free_kernel(&init_mm, new);
 	return 0;
 }
 
@@ -1390,7 +1388,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	int err;
-	struct page *pmd_page;
+	pgtable_t token;
 	spinlock_t *uninitialized_var(ptl);
 
 	pte = (mm == &init_mm) ?
@@ -1401,10 +1399,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 	BUG_ON(pmd_huge(*pmd));
 
-	pmd_page = pmd_page(*pmd);
+	token = pmd_pgtable(*pmd);
 
 	do {
-		err = fn(pte, pmd_page, addr, data);
+		err = fn(pte, token, addr, data);
 		if (err)
 			break;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0536dde139d1..950c0be9ca81 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -820,7 +820,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 }
 
 
-static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
 {
 	/* apply_to_page_range() does all the hard work. */
 	return 0;
-- 
cgit 


From ea54b10c7773007e173da31fe7adcc049da33331 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 28 Jan 2008 10:40:59 +0200
Subject: IB/mlx4: Use multiple WQ blocks to post smaller send WQEs

ConnectX HCA supports shrinking WQEs, so that a single work request
can be made of multiple units of wqe_shift.  This way, WRs can differ
in size, and do not have to be a power of 2 in size, saving memory and
speeding up send WR posting.  Unfortunately, if we do this then the
wqe_index field in CQEs can't be used to look up the WR ID anymore, so
our implementation does this only if selective signaling is off.

Further, on 32-bit platforms, we can't use vmap() to make the QP
buffer virtually contigious. Thus we have to use constant-sized WRs to
make sure a WR is always fully within a single page-sized chunk.

Finally, we use WRs with the NOP opcode to avoid wrapping around the
queue buffer in the middle of posting a WR, and we set the
NoErrorCompletion bit to avoid getting completions with error for NOP
WRs.  However, NEC is only supported starting with firmware 2.2.232,
so we use constant-sized WRs for older firmware.  And, since MLX QPs
only support SEND, we use constant-sized WRs in this case.

When stamping during NOP posting, do stamping following setting of the
NOP WQE valid bit.

Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c      |  12 +-
 drivers/infiniband/hw/mlx4/mlx4_ib.h |   2 +
 drivers/infiniband/hw/mlx4/qp.c      | 210 +++++++++++++++++++++++++++++------
 include/linux/mlx4/device.h          |   5 +
 include/linux/mlx4/qp.h              |   4 +
 5 files changed, 197 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 8ac7b973f870..7360bbafbe84 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -326,6 +326,12 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
+	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
+		     is_send)) {
+		printk(KERN_WARNING "Completion for NOP opcode detected!\n");
+		return -EINVAL;
+	}
+
 	if (!*cur_qp ||
 	    (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
 		/*
@@ -348,8 +354,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
-		wqe_ctr = be16_to_cpu(cqe->wqe_index);
-		wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
+		if (!(*cur_qp)->sq_signal_bits) {
+			wqe_ctr = be16_to_cpu(cqe->wqe_index);
+			wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
+		}
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
 	} else if ((*cur_qp)->ibqp.srq) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 28697653a370..3726e451a327 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -120,6 +120,8 @@ struct mlx4_ib_qp {
 
 	u32			doorbell_qpn;
 	__be32			sq_signal_bits;
+	unsigned		sq_next_wqe;
+	int			sq_max_wqes_per_wr;
 	int			sq_spare_wqes;
 	struct mlx4_ib_wq	sq;
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 376db730bc75..958e205b6d7c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -30,6 +30,8 @@
  * SOFTWARE.
  */
 
+#include <linux/log2.h>
+
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
 
@@ -111,16 +113,87 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
 
 /*
  * Stamp a SQ WQE so that it is invalid if prefetched by marking the
- * first four bytes of every 64 byte chunk with 0xffffffff, except for
- * the very first chunk of the WQE.
+ * first four bytes of every 64 byte chunk with
+ *     0x7FFFFFF | (invalid_ownership_value << 31).
+ *
+ * When the max work request size is less than or equal to the WQE
+ * basic block size, as an optimization, we can stamp all WQEs with
+ * 0xffffffff, and skip the very first chunk of each WQE.
  */
-static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
+static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
 {
-	u32 *wqe = get_send_wqe(qp, n);
+	u32 *wqe;
 	int i;
+	int s;
+	int ind;
+	void *buf;
+	__be32 stamp;
+
+	s = roundup(size, 1U << qp->sq.wqe_shift);
+	if (qp->sq_max_wqes_per_wr > 1) {
+		for (i = 0; i < s; i += 64) {
+			ind = (i >> qp->sq.wqe_shift) + n;
+			stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
+						       cpu_to_be32(0xffffffff);
+			buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+			wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
+			*wqe = stamp;
+		}
+	} else {
+		buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+		for (i = 64; i < s; i += 64) {
+			wqe = buf + i;
+			*wqe = 0xffffffff;
+		}
+	}
+}
+
+static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
+{
+	struct mlx4_wqe_ctrl_seg *ctrl;
+	struct mlx4_wqe_inline_seg *inl;
+	void *wqe;
+	int s;
+
+	ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+	s = sizeof(struct mlx4_wqe_ctrl_seg);
+
+	if (qp->ibqp.qp_type == IB_QPT_UD) {
+		struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
+		struct mlx4_av *av = (struct mlx4_av *)dgram->av;
+		memset(dgram, 0, sizeof *dgram);
+		av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
+		s += sizeof(struct mlx4_wqe_datagram_seg);
+	}
+
+	/* Pad the remainder of the WQE with an inline data segment. */
+	if (size > s) {
+		inl = wqe + s;
+		inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
+	}
+	ctrl->srcrb_flags = 0;
+	ctrl->fence_size = size / 16;
+	/*
+	 * Make sure descriptor is fully written before setting ownership bit
+	 * (because HW can start executing as soon as we do).
+	 */
+	wmb();
+
+	ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
+		(n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
 
-	for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)
-		wqe[i] = 0xffffffff;
+	stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
+}
+
+/* Post NOP WQE to prevent wrap-around in the middle of WR */
+static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
+{
+	unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
+	if (unlikely(s < qp->sq_max_wqes_per_wr)) {
+		post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
+		ind += s;
+	}
+	return ind;
 }
 
 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
@@ -237,6 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 			      enum ib_qp_type type, struct mlx4_ib_qp *qp)
 {
+	int s;
+
 	/* Sanity check SQ size before proceeding */
 	if (cap->max_send_wr	 > dev->dev->caps.max_wqes  ||
 	    cap->max_send_sge	 > dev->dev->caps.max_sq_sg ||
@@ -252,20 +327,74 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
 		return -EINVAL;
 
-	qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
-							sizeof (struct mlx4_wqe_data_seg),
-							cap->max_inline_data +
-							sizeof (struct mlx4_wqe_inline_seg)) +
-						    send_wqe_overhead(type)));
-	qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
-		sizeof (struct mlx4_wqe_data_seg);
+	s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
+		cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
+		send_wqe_overhead(type);
 
 	/*
-	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-	 * allow HW to prefetch.
+	 * Hermon supports shrinking WQEs, such that a single work
+	 * request can include multiple units of 1 << wqe_shift.  This
+	 * way, work requests can differ in size, and do not have to
+	 * be a power of 2 in size, saving memory and speeding up send
+	 * WR posting.  Unfortunately, if we do this then the
+	 * wqe_index field in CQEs can't be used to look up the WR ID
+	 * anymore, so we do this only if selective signaling is off.
+	 *
+	 * Further, on 32-bit platforms, we can't use vmap() to make
+	 * the QP buffer virtually contigious.  Thus we have to use
+	 * constant-sized WRs to make sure a WR is always fully within
+	 * a single page-sized chunk.
+	 *
+	 * Finally, we use NOP work requests to pad the end of the
+	 * work queue, to avoid wrap-around in the middle of WR.  We
+	 * set NEC bit to avoid getting completions with error for
+	 * these NOP WRs, but since NEC is only supported starting
+	 * with firmware 2.2.232, we use constant-sized WRs for older
+	 * firmware.
+	 *
+	 * And, since MLX QPs only support SEND, we use constant-sized
+	 * WRs in this case.
+	 *
+	 * We look for the smallest value of wqe_shift such that the
+	 * resulting number of wqes does not exceed device
+	 * capabilities.
+	 *
+	 * We set WQE size to at least 64 bytes, this way stamping
+	 * invalidates each WQE.
 	 */
-	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-	qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);
+	if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
+	    qp->sq_signal_bits && BITS_PER_LONG == 64 &&
+	    type != IB_QPT_SMI && type != IB_QPT_GSI)
+		qp->sq.wqe_shift = ilog2(64);
+	else
+		qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
+
+	for (;;) {
+		if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz)
+			return -EINVAL;
+
+		qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
+
+		/*
+		 * We need to leave 2 KB + 1 WR of headroom in the SQ to
+		 * allow HW to prefetch.
+		 */
+		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
+		qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
+						    qp->sq_max_wqes_per_wr +
+						    qp->sq_spare_wqes);
+
+		if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
+			break;
+
+		if (qp->sq_max_wqes_per_wr <= 1)
+			return -EINVAL;
+
+		++qp->sq.wqe_shift;
+	}
+
+	qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
+			 send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg);
 
 	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
 		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
@@ -277,7 +406,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 		qp->sq.offset = 0;
 	}
 
-	cap->max_send_wr  = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes;
+	cap->max_send_wr  = qp->sq.max_post =
+		(qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
 	cap->max_send_sge = qp->sq.max_gs;
 	/* We don't support inline sends for kernel QPs (yet) */
 	cap->max_inline_data = 0;
@@ -323,6 +453,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	qp->rq.tail	    = 0;
 	qp->sq.head	    = 0;
 	qp->sq.tail	    = 0;
+	qp->sq_next_wqe     = 0;
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+	else
+		qp->sq_signal_bits = 0;
 
 	err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
 	if (err)
@@ -413,11 +549,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	 */
 	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
 
-	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
-		qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-	else
-		qp->sq_signal_bits = 0;
-
 	qp->mqp.event = mlx4_ib_qp_event;
 
 	return 0;
@@ -912,7 +1043,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 			ctrl = get_send_wqe(qp, i);
 			ctrl->owner_opcode = cpu_to_be32(1 << 31);
 
-			stamp_send_wqe(qp, i);
+			stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
 		}
 	}
 
@@ -965,6 +1096,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		qp->rq.tail = 0;
 		qp->sq.head = 0;
 		qp->sq.tail = 0;
+		qp->sq_next_wqe = 0;
 		if (!ibqp->srq)
 			*qp->db.db  = 0;
 	}
@@ -1274,13 +1406,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	unsigned long flags;
 	int nreq;
 	int err = 0;
-	int ind;
-	int size;
+	unsigned ind;
+	int uninitialized_var(stamp);
+	int uninitialized_var(size);
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	ind = qp->sq.head;
+	ind = qp->sq_next_wqe;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -1296,7 +1429,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		}
 
 		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
-		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
+		qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
 
 		ctrl->srcrb_flags =
 			(wr->send_flags & IB_SEND_SIGNALED ?
@@ -1409,16 +1542,23 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
 			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
 
+		stamp = ind + qp->sq_spare_wqes;
+		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
+
 		/*
 		 * We can improve latency by not stamping the last
 		 * send queue WQE until after ringing the doorbell, so
 		 * only stamp here if there are still more WQEs to post.
+		 *
+		 * Same optimization applies to padding with NOP wqe
+		 * in case of WQE shrinking (used to prevent wrap-around
+		 * in the middle of WR).
 		 */
-		if (wr->next)
-			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
-				       (qp->sq.wqe_cnt - 1));
+		if (wr->next) {
+			stamp_send_wqe(qp, stamp, size * 16);
+			ind = pad_wraparound(qp, ind);
+		}
 
-		++ind;
 	}
 
 out:
@@ -1440,8 +1580,10 @@ out:
 		 */
 		mmiowb();
 
-		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
-			       (qp->sq.wqe_cnt - 1));
+		stamp_send_wqe(qp, stamp, size * 16);
+
+		ind = pad_wraparound(qp, ind);
+		qp->sq_next_wqe = ind;
 	}
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4210ac4a8bcd..6cdf813cd478 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -133,6 +133,11 @@ enum {
 	MLX4_STAT_RATE_OFFSET	= 5
 };
 
+static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
+{
+	return (major << 32) | (minor << 16) | subminor;
+}
+
 struct mlx4_caps {
 	u64			fw_ver;
 	int			num_ports;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 3968b943259a..09a2230923f2 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -154,7 +154,11 @@ struct mlx4_qp_context {
 	u32			reserved5[10];
 };
 
+/* Which firmware version adds support for NEC (NoErrorCompletion) bit */
+#define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
+
 enum {
+	MLX4_WQE_CTRL_NEC	= 1 << 29,
 	MLX4_WQE_CTRL_FENCE	= 1 << 6,
 	MLX4_WQE_CTRL_CQ_UPDATE	= 3 << 2,
 	MLX4_WQE_CTRL_SOLICITED	= 1 << 1,
-- 
cgit 


From 765cdb6cef63c0b41c3f6c9285769080b3f41bb0 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 8 Feb 2008 15:00:49 -0800
Subject: DCA: convert struct class_device to struct device.

Thanks to Kay for keeping us honest.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Cc: "Williams, Dan J" <dan.j.williams@intel.com>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/dca/dca-sysfs.c | 15 +++++++--------
 include/linux/dca.h     |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c
index 24a263b6844c..011328faa5f2 100644
--- a/drivers/dca/dca-sysfs.c
+++ b/drivers/dca/dca-sysfs.c
@@ -12,10 +12,10 @@ static spinlock_t dca_idr_lock;
 
 int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
 {
-	struct class_device *cd;
+	struct device *cd;
 
-	cd = class_device_create(dca_class, dca->cd, MKDEV(0, slot + 1),
-				 dev, "requester%d", slot);
+	cd = device_create(dca_class, dca->cd, MKDEV(0, slot + 1),
+			   "requester%d", slot);
 	if (IS_ERR(cd))
 		return PTR_ERR(cd);
 	return 0;
@@ -23,12 +23,12 @@ int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
 
 void dca_sysfs_remove_req(struct dca_provider *dca, int slot)
 {
-	class_device_destroy(dca_class, MKDEV(0, slot + 1));
+	device_destroy(dca_class, MKDEV(0, slot + 1));
 }
 
 int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev)
 {
-	struct class_device *cd;
+	struct device *cd;
 	int err = 0;
 
 idr_try_again:
@@ -46,8 +46,7 @@ idr_try_again:
 		return err;
 	}
 
-	cd = class_device_create(dca_class, NULL, MKDEV(0, 0),
-				 dev, "dca%d", dca->id);
+	cd = device_create(dca_class, dev, MKDEV(0, 0), "dca%d", dca->id);
 	if (IS_ERR(cd)) {
 		spin_lock(&dca_idr_lock);
 		idr_remove(&dca_idr, dca->id);
@@ -60,7 +59,7 @@ idr_try_again:
 
 void dca_sysfs_remove_provider(struct dca_provider *dca)
 {
-	class_device_unregister(dca->cd);
+	device_unregister(dca->cd);
 	dca->cd = NULL;
 	spin_lock(&dca_idr_lock);
 	idr_remove(&dca_idr, dca->id);
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 83eaecc6f8ab..af61cd1f37e9 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -11,7 +11,7 @@ void dca_unregister_notify(struct notifier_block *nb);
 
 struct dca_provider {
 	struct dca_ops		*ops;
-	struct class_device 	*cd;
+	struct device 		*cd;
 	int			 id;
 };
 
-- 
cgit 


From a0dd25b2c83de4623487ca4de9c1d962b552ca0f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 9 Feb 2008 04:01:48 -0500
Subject: ACPI: thermal: buildfix for CONFIG_THERMAL=n

This fixes the build, but acpi_fan_add() still needs
to be updated to handle thermal_cooling_device_register()
returning NULL as a non-fatal condition.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/thermal.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index bba7712cadc7..818ca1cf0b6d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -79,7 +79,9 @@ struct thermal_zone_device {
 };
 
 struct thermal_zone_device *thermal_zone_device_register(char *, int, void *,
-					struct thermal_zone_device_ops *);
+							 struct
+							 thermal_zone_device_ops
+							 *);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
@@ -87,8 +89,23 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 
+#ifdef	CONFIG_THERMAL
 struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
-					struct thermal_cooling_device_ops *);
+							       struct
+							       thermal_cooling_device_ops
+							       *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
+#else
+static inline struct thermal_cooling_device
+*thermal_cooling_device_register(char *c, void *v,
+				 struct thermal_cooling_device_ops *t)
+{
+	return NULL;
+}
+static inline
+    void thermal_cooling_device_unregister(struct thermal_cooling_device *t)
+{
+};
+#endif
 
-#endif				/* __THERMAL_H__ */
+#endif /* __THERMAL_H__ */
-- 
cgit 


From 880cdf3a8122288d37829ce01eadf8822bb386db Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Sat, 9 Feb 2008 00:10:12 -0800
Subject: Fix compile error on nommu for is_swap_pte

  CC      mm/vmscan.o
In file included from
/home/bunk/linux/kernel-2.6/git/linux-2.6/mm/vmscan.c:44:
/home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/swapops.h: In function 'is_swap_pte':
/home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/swapops.h:48: error: implicit declaration of function 'pte_none'
/home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/swapops.h:48: error: implicit declaration of function 'pte_present'

Does it ever make sense to ask "is this pte a swap entry?" on a machine
with no MMU?  Presumably this also means it has no ptes too, right?  In
which case, it's better to comment the whole function out.  Then when
someone tries to ask the above meaningless question, they get a compile
error rather than a meaningless answer.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Reported-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 7bf2d149d209..6ec39ab27b4b 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -42,11 +42,13 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
 	return entry.val & SWP_OFFSET_MASK(entry);
 }
 
+#ifdef CONFIG_MMU
 /* check whether a pte points to a swap entry */
 static inline int is_swap_pte(pte_t pte)
 {
 	return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
 }
+#endif
 
 /*
  * Convert the arch-dependent pte representation of a swp_entry_t into an
-- 
cgit 


From 60c12b1202a60eabb1c61317e5d2678fcea9893f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Sat, 9 Feb 2008 00:10:15 -0800
Subject: memcontrol: add vm_match_cgroup()

mm_cgroup() is exclusively used to test whether an mm's mem_cgroup pointer
is pointing to a specific cgroup.  Instead of returning the pointer, we can
just do the test itself in a new macro:

	vm_match_cgroup(mm, cgroup)

returns non-zero if the mm's mem_cgroup points to cgroup.  Otherwise it
returns zero.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++------
 mm/memcontrol.c            |  2 +-
 mm/rmap.c                  |  4 ++--
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9815951ec995..925d57b236aa 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,10 +51,8 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
-static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
-{
-	return rcu_dereference(mm->mem_cgroup);
-}
+#define vm_match_cgroup(mm, cgroup)	\
+	((cgroup) == rcu_dereference((mm)->mem_cgroup))
 
 extern int mem_cgroup_prepare_migration(struct page *page);
 extern void mem_cgroup_end_migration(struct page *page);
@@ -123,9 +121,9 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
-static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
+static inline int vm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
-	return NULL;
+	return 1;
 }
 
 static inline int task_in_mem_cgroup(struct task_struct *task,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5c2c702af617..6bded84c20c8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -399,7 +399,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 	int ret;
 
 	task_lock(task);
-	ret = task->mm && mm_cgroup(task->mm) == mem;
+	ret = task->mm && vm_match_cgroup(task->mm, mem);
 	task_unlock(task);
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index a0e92a263d12..8fd527c4e2bf 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont))
+		if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
@@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont))
+		if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
 				  == (VM_LOCKED|VM_MAYSHARE)) {
-- 
cgit 


From baf8532a147d5b76681ce040e2c8f25a3f0e718d Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Sat, 9 Feb 2008 10:20:54 -0800
Subject: memstick: initial commit for Sony MemoryStick support

Sony MemoryStick cards are used in many products manufactured by Sony.
They are available both as storage and as IO expansion cards.  Currently,
only MemoryStick Pro storage cards are supported via TI FlashMedia
MemoryStick interface.

[mboton@gmail.com: biuld fix]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: Miguel Boton <mboton@gmail.co>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                         |    7 +
 drivers/Kconfig                     |    2 +
 drivers/Makefile                    |    1 +
 drivers/memstick/Kconfig            |   26 +
 drivers/memstick/Makefile           |   11 +
 drivers/memstick/core/Kconfig       |   26 +
 drivers/memstick/core/Makefile      |   11 +
 drivers/memstick/core/memstick.c    |  614 ++++++++++++++++
 drivers/memstick/core/mspro_block.c | 1351 +++++++++++++++++++++++++++++++++++
 drivers/memstick/host/Kconfig       |   22 +
 drivers/memstick/host/Makefile      |   10 +
 drivers/memstick/host/tifm_ms.c     |  685 ++++++++++++++++++
 drivers/misc/tifm_7xx1.c            |   17 +
 drivers/misc/tifm_core.c            |    7 +
 include/linux/memstick.h            |  299 ++++++++
 include/linux/tifm.h                |    4 +
 16 files changed, 3093 insertions(+)
 create mode 100644 drivers/memstick/Kconfig
 create mode 100644 drivers/memstick/Makefile
 create mode 100644 drivers/memstick/core/Kconfig
 create mode 100644 drivers/memstick/core/Makefile
 create mode 100644 drivers/memstick/core/memstick.c
 create mode 100644 drivers/memstick/core/mspro_block.c
 create mode 100644 drivers/memstick/host/Kconfig
 create mode 100644 drivers/memstick/host/Makefile
 create mode 100644 drivers/memstick/host/tifm_ms.c
 create mode 100644 include/linux/memstick.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0d6f5119a6da..5740aa216e11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3627,6 +3627,13 @@ L:	linux-acpi@vger.kernel.org
 W:	http://www.linux.it/~malattia/wiki/index.php/Sony_drivers
 S:	Maintained
 
+SONY MEMORYSTICK CARD SUPPORT
+P:	Alex Dubov
+M:	oakad@yahoo.com
+L:	linux-kernel@vger.kernel.org
+W:	http://tifmxx.berlios.de/
+S:	Maintained
+
 SOUND
 P:	Jaroslav Kysela
 M:	perex@perex.cz
diff --git a/drivers/Kconfig b/drivers/Kconfig
index b86877bdc7ac..3a0e3549739f 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -80,6 +80,8 @@ source "drivers/usb/Kconfig"
 
 source "drivers/mmc/Kconfig"
 
+source "drivers/memstick/Kconfig"
+
 source "drivers/leds/Kconfig"
 
 source "drivers/infiniband/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 30ba97ec5eb5..e5e394a7e6c0 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -78,6 +78,7 @@ obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
+obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
diff --git a/drivers/memstick/Kconfig b/drivers/memstick/Kconfig
new file mode 100644
index 000000000000..1093fdb07297
--- /dev/null
+++ b/drivers/memstick/Kconfig
@@ -0,0 +1,26 @@
+#
+# MemoryStick subsystem configuration
+#
+
+menuconfig MEMSTICK
+	tristate "Sony MemoryStick card support (EXPERIMENTAL)"
+	help
+	  Sony MemoryStick is a proprietary storage/extension card protocol.
+
+	  If you want MemoryStick support, you should say Y here and also
+	  to the specific driver for your MMC interface.
+
+if MEMSTICK
+
+config MEMSTICK_DEBUG
+	bool "MemoryStick debugging"
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This enables MemoryStick core and driver debugging.
+
+
+source "drivers/memstick/core/Kconfig"
+
+source "drivers/memstick/host/Kconfig"
+
+endif # MEMSTICK
diff --git a/drivers/memstick/Makefile b/drivers/memstick/Makefile
new file mode 100644
index 000000000000..dc160fb43515
--- /dev/null
+++ b/drivers/memstick/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the kernel MemoryStick device drivers.
+#
+
+ifeq ($(CONFIG_MEMSTICK_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
+obj-$(CONFIG_MEMSTICK)		+= core/
+obj-$(CONFIG_MEMSTICK)		+= host/
+
diff --git a/drivers/memstick/core/Kconfig b/drivers/memstick/core/Kconfig
new file mode 100644
index 000000000000..95f1814b5368
--- /dev/null
+++ b/drivers/memstick/core/Kconfig
@@ -0,0 +1,26 @@
+#
+# MemoryStick core configuration
+#
+
+comment "MemoryStick drivers"
+
+config MEMSTICK_UNSAFE_RESUME
+        bool "Allow unsafe resume (DANGEROUS)"
+        help
+          If you say Y here, the MemoryStick layer will assume that all
+          cards stayed in their respective slots during the suspend. The
+          normal behaviour is to remove them at suspend and
+          redetecting them at resume. Breaking this assumption will
+          in most cases result in data corruption.
+
+          This option is usually just for embedded systems which use
+          a MemoryStick card for rootfs. Most people should say N here.
+
+config MSPRO_BLOCK
+	tristate "MemoryStick Pro block device driver"
+	depends on BLOCK
+	help
+	  Say Y here to enable the MemoryStick Pro block device driver
+	  support. This provides a block device driver, which you can use
+	  to mount the filesystem. Almost everyone wishing MemoryStick
+	  support should say Y or M here.
diff --git a/drivers/memstick/core/Makefile b/drivers/memstick/core/Makefile
new file mode 100644
index 000000000000..8b2b5293877e
--- /dev/null
+++ b/drivers/memstick/core/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the kernel MemoryStick core.
+#
+
+ifeq ($(CONFIG_MEMSTICK_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
+obj-$(CONFIG_MEMSTICK)		+= memstick.o
+
+obj-$(CONFIG_MSPRO_BLOCK)	+= mspro_block.o
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
new file mode 100644
index 000000000000..bba467fe4bce
--- /dev/null
+++ b/drivers/memstick/core/memstick.c
@@ -0,0 +1,614 @@
+/*
+ *  Sony MemoryStick support
+ *
+ *  Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Special thanks to Carlos Corbacho for providing various MemoryStick cards
+ * that made this driver possible.
+ *
+ */
+
+#include <linux/memstick.h>
+#include <linux/idr.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+
+#define DRIVER_NAME "memstick"
+#define DRIVER_VERSION "0.2"
+
+static unsigned int cmd_retries = 3;
+module_param(cmd_retries, uint, 0644);
+
+static struct workqueue_struct *workqueue;
+static DEFINE_IDR(memstick_host_idr);
+static DEFINE_SPINLOCK(memstick_host_lock);
+
+static int memstick_dev_match(struct memstick_dev *card,
+			      struct memstick_device_id *id)
+{
+	if (id->match_flags & MEMSTICK_MATCH_ALL) {
+		if ((id->type == card->id.type)
+		    && (id->category == card->id.category)
+		    && (id->class == card->id.class))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int memstick_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						 dev);
+	struct memstick_driver *ms_drv = container_of(drv,
+						      struct memstick_driver,
+						      driver);
+	struct memstick_device_id *ids = ms_drv->id_table;
+
+	if (ids) {
+		while (ids->match_flags) {
+			if (memstick_dev_match(card, ids))
+				return 1;
+			++ids;
+		}
+	}
+	return 0;
+}
+
+static int memstick_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						  dev);
+
+	if (add_uevent_var(env, "MEMSTICK_TYPE=%02X", card->id.type))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEMSTICK_CATEGORY=%02X", card->id.category))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEMSTICK_CLASS=%02X", card->id.class))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int memstick_device_probe(struct device *dev)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						 dev);
+	struct memstick_driver *drv = container_of(dev->driver,
+						   struct memstick_driver,
+						   driver);
+	int rc = -ENODEV;
+
+	if (dev->driver && drv->probe) {
+		rc = drv->probe(card);
+		if (!rc)
+			get_device(dev);
+	}
+	return rc;
+}
+
+static int memstick_device_remove(struct device *dev)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						  dev);
+	struct memstick_driver *drv = container_of(dev->driver,
+						   struct memstick_driver,
+						   driver);
+
+	if (dev->driver && drv->remove) {
+		drv->remove(card);
+		card->dev.driver = NULL;
+	}
+
+	put_device(dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int memstick_device_suspend(struct device *dev, pm_message_t state)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						  dev);
+	struct memstick_driver *drv = container_of(dev->driver,
+						   struct memstick_driver,
+						   driver);
+
+	if (dev->driver && drv->suspend)
+		return drv->suspend(card, state);
+	return 0;
+}
+
+static int memstick_device_resume(struct device *dev)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						  dev);
+	struct memstick_driver *drv = container_of(dev->driver,
+						   struct memstick_driver,
+						   driver);
+
+	if (dev->driver && drv->resume)
+		return drv->resume(card);
+	return 0;
+}
+
+#else
+
+#define memstick_device_suspend NULL
+#define memstick_device_resume NULL
+
+#endif /* CONFIG_PM */
+
+#define MEMSTICK_ATTR(name, format)                                           \
+static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \
+			    char *buf)                                        \
+{                                                                             \
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,    \
+						 dev);                        \
+	return sprintf(buf, format, card->id.name);                           \
+}
+
+MEMSTICK_ATTR(type, "%02X");
+MEMSTICK_ATTR(category, "%02X");
+MEMSTICK_ATTR(class, "%02X");
+
+#define MEMSTICK_ATTR_RO(name) __ATTR(name, S_IRUGO, name##_show, NULL)
+
+static struct device_attribute memstick_dev_attrs[] = {
+	MEMSTICK_ATTR_RO(type),
+	MEMSTICK_ATTR_RO(category),
+	MEMSTICK_ATTR_RO(class),
+	__ATTR_NULL
+};
+
+static struct bus_type memstick_bus_type = {
+	.name           = "memstick",
+	.dev_attrs      = memstick_dev_attrs,
+	.match          = memstick_bus_match,
+	.uevent         = memstick_uevent,
+	.probe          = memstick_device_probe,
+	.remove         = memstick_device_remove,
+	.suspend        = memstick_device_suspend,
+	.resume         = memstick_device_resume
+};
+
+static void memstick_free(struct class_device *cdev)
+{
+	struct memstick_host *host = container_of(cdev, struct memstick_host,
+						  cdev);
+	kfree(host);
+}
+
+static struct class memstick_host_class = {
+	.name       = "memstick_host",
+	.release    = memstick_free
+};
+
+static void memstick_free_card(struct device *dev)
+{
+	struct memstick_dev *card = container_of(dev, struct memstick_dev,
+						 dev);
+	kfree(card);
+}
+
+static int memstick_dummy_check(struct memstick_dev *card)
+{
+	return 0;
+}
+
+/**
+ * memstick_detect_change - schedule media detection on memstick host
+ * @host - host to use
+ */
+void memstick_detect_change(struct memstick_host *host)
+{
+	queue_work(workqueue, &host->media_checker);
+}
+EXPORT_SYMBOL(memstick_detect_change);
+
+/**
+ * memstick_next_req - called by host driver to obtain next request to process
+ * @host - host to use
+ * @mrq - pointer to stick the request to
+ *
+ * Host calls this function from idle state (*mrq == NULL) or after finishing
+ * previous request (*mrq should point to it). If previous request was
+ * unsuccessful, it is retried for predetermined number of times. Return value
+ * of 0 means that new request was assigned to the host.
+ */
+int memstick_next_req(struct memstick_host *host, struct memstick_request **mrq)
+{
+	int rc = -ENXIO;
+
+	if ((*mrq) && (*mrq)->error && host->retries) {
+		(*mrq)->error = rc;
+		host->retries--;
+		return 0;
+	}
+
+	if (host->card && host->card->next_request)
+		rc = host->card->next_request(host->card, mrq);
+
+	if (!rc)
+		host->retries = cmd_retries;
+	else
+		*mrq = NULL;
+
+	return rc;
+}
+EXPORT_SYMBOL(memstick_next_req);
+
+/**
+ * memstick_new_req - notify the host that some requests are pending
+ * @host - host to use
+ */
+void memstick_new_req(struct memstick_host *host)
+{
+	host->retries = cmd_retries;
+	host->request(host);
+}
+EXPORT_SYMBOL(memstick_new_req);
+
+/**
+ * memstick_init_req_sg - set request fields needed for bulk data transfer
+ * @mrq - request to use
+ * @tpc - memstick Transport Protocol Command
+ * @sg - TPC argument
+ */
+void memstick_init_req_sg(struct memstick_request *mrq, unsigned char tpc,
+			  struct scatterlist *sg)
+{
+	mrq->tpc = tpc;
+	if (tpc & 8)
+		mrq->data_dir = WRITE;
+	else
+		mrq->data_dir = READ;
+
+	mrq->sg = *sg;
+	mrq->io_type = MEMSTICK_IO_SG;
+
+	if (tpc == MS_TPC_SET_CMD || tpc == MS_TPC_EX_SET_CMD)
+		mrq->need_card_int = 1;
+	else
+		mrq->need_card_int = 0;
+
+	mrq->get_int_reg = 0;
+}
+EXPORT_SYMBOL(memstick_init_req_sg);
+
+/**
+ * memstick_init_req - set request fields needed for short data transfer
+ * @mrq - request to use
+ * @tpc - memstick Transport Protocol Command
+ * @buf - TPC argument buffer
+ * @length - TPC argument size
+ *
+ * The intended use of this function (transfer of data items several bytes
+ * in size) allows us to just copy the value between request structure and
+ * user supplied buffer.
+ */
+void memstick_init_req(struct memstick_request *mrq, unsigned char tpc,
+		       void *buf, size_t length)
+{
+	mrq->tpc = tpc;
+	if (tpc & 8)
+		mrq->data_dir = WRITE;
+	else
+		mrq->data_dir = READ;
+
+	mrq->data_len = length > sizeof(mrq->data) ? sizeof(mrq->data) : length;
+	if (mrq->data_dir == WRITE)
+		memcpy(mrq->data, buf, mrq->data_len);
+
+	mrq->io_type = MEMSTICK_IO_VAL;
+
+	if (tpc == MS_TPC_SET_CMD || tpc == MS_TPC_EX_SET_CMD)
+		mrq->need_card_int = 1;
+	else
+		mrq->need_card_int = 0;
+
+	mrq->get_int_reg = 0;
+}
+EXPORT_SYMBOL(memstick_init_req);
+
+/*
+ * Functions prefixed with "h_" are protocol callbacks. They can be called from
+ * interrupt context. Return value of 0 means that request processing is still
+ * ongoing, while special error value of -EAGAIN means that current request is
+ * finished (and request processor should come back some time later).
+ */
+
+static int h_memstick_read_dev_id(struct memstick_dev *card,
+				  struct memstick_request **mrq)
+{
+	struct ms_id_register id_reg;
+
+	if (!(*mrq)) {
+		memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL,
+				  sizeof(struct ms_id_register));
+		*mrq = &card->current_mrq;
+		return 0;
+	} else {
+		if (!(*mrq)->error) {
+			memcpy(&id_reg, (*mrq)->data, sizeof(id_reg));
+			card->id.match_flags = MEMSTICK_MATCH_ALL;
+			card->id.type = id_reg.type;
+			card->id.category = id_reg.category;
+			card->id.class = id_reg.class;
+		}
+		complete(&card->mrq_complete);
+		return -EAGAIN;
+	}
+}
+
+static int h_memstick_set_rw_addr(struct memstick_dev *card,
+				  struct memstick_request **mrq)
+{
+	if (!(*mrq)) {
+		memstick_init_req(&card->current_mrq, MS_TPC_SET_RW_REG_ADRS,
+				  (char *)&card->reg_addr,
+				  sizeof(card->reg_addr));
+		*mrq = &card->current_mrq;
+		return 0;
+	} else {
+		complete(&card->mrq_complete);
+		return -EAGAIN;
+	}
+}
+
+/**
+ * memstick_set_rw_addr - issue SET_RW_REG_ADDR request and wait for it to
+ *                        complete
+ * @card - media device to use
+ */
+int memstick_set_rw_addr(struct memstick_dev *card)
+{
+	card->next_request = h_memstick_set_rw_addr;
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+
+	return card->current_mrq.error;
+}
+EXPORT_SYMBOL(memstick_set_rw_addr);
+
+static struct memstick_dev *memstick_alloc_card(struct memstick_host *host)
+{
+	struct memstick_dev *card = kzalloc(sizeof(struct memstick_dev),
+					    GFP_KERNEL);
+	struct memstick_dev *old_card = host->card;
+	struct ms_id_register id_reg;
+
+	if (card) {
+		card->host = host;
+		snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
+			 "%s", host->cdev.class_id);
+		card->dev.parent = host->cdev.dev;
+		card->dev.bus = &memstick_bus_type;
+		card->dev.release = memstick_free_card;
+		card->check = memstick_dummy_check;
+
+		card->reg_addr.r_offset = offsetof(struct ms_register, id);
+		card->reg_addr.r_length = sizeof(id_reg);
+		card->reg_addr.w_offset = offsetof(struct ms_register, id);
+		card->reg_addr.w_length = sizeof(id_reg);
+
+		init_completion(&card->mrq_complete);
+
+		host->card = card;
+		if (memstick_set_rw_addr(card))
+			goto err_out;
+
+		card->next_request = h_memstick_read_dev_id;
+		memstick_new_req(host);
+		wait_for_completion(&card->mrq_complete);
+
+		if (card->current_mrq.error)
+			goto err_out;
+	}
+	host->card = old_card;
+	return card;
+err_out:
+	host->card = old_card;
+	kfree(card);
+	return NULL;
+}
+
+static void memstick_power_on(struct memstick_host *host)
+{
+	host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_ON);
+	host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+	msleep(1);
+}
+
+static void memstick_check(struct work_struct *work)
+{
+	struct memstick_host *host = container_of(work, struct memstick_host,
+						  media_checker);
+	struct memstick_dev *card;
+
+	dev_dbg(host->cdev.dev, "memstick_check started\n");
+	mutex_lock(&host->lock);
+	if (!host->card)
+		memstick_power_on(host);
+
+	card = memstick_alloc_card(host);
+
+	if (!card) {
+		if (host->card) {
+			device_unregister(&host->card->dev);
+			host->card = NULL;
+		}
+	} else {
+		dev_dbg(host->cdev.dev, "new card %02x, %02x, %02x\n",
+			card->id.type, card->id.category, card->id.class);
+		if (host->card) {
+			if (memstick_set_rw_addr(host->card)
+			    || !memstick_dev_match(host->card, &card->id)
+			    || !(host->card->check(host->card))) {
+				device_unregister(&host->card->dev);
+				host->card = NULL;
+			}
+		}
+
+		if (!host->card) {
+			host->card = card;
+			if (device_register(&card->dev)) {
+				kfree(host->card);
+				host->card = NULL;
+			}
+		} else
+			kfree(card);
+	}
+
+	if (!host->card)
+		host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF);
+
+	mutex_unlock(&host->lock);
+	dev_dbg(host->cdev.dev, "memstick_check finished\n");
+}
+
+/**
+ * memstick_alloc_host - allocate a memstick_host structure
+ * @extra: size of the user private data to allocate
+ * @dev: parent device of the host
+ */
+struct memstick_host *memstick_alloc_host(unsigned int extra,
+					  struct device *dev)
+{
+	struct memstick_host *host;
+
+	host = kzalloc(sizeof(struct memstick_host) + extra, GFP_KERNEL);
+	if (host) {
+		mutex_init(&host->lock);
+		INIT_WORK(&host->media_checker, memstick_check);
+		host->cdev.class = &memstick_host_class;
+		host->cdev.dev = dev;
+		class_device_initialize(&host->cdev);
+	}
+	return host;
+}
+EXPORT_SYMBOL(memstick_alloc_host);
+
+/**
+ * memstick_add_host - start request processing on memstick host
+ * @host - host to use
+ */
+int memstick_add_host(struct memstick_host *host)
+{
+	int rc;
+
+	if (!idr_pre_get(&memstick_host_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&memstick_host_lock);
+	rc = idr_get_new(&memstick_host_idr, host, &host->id);
+	spin_unlock(&memstick_host_lock);
+	if (rc)
+		return rc;
+
+	snprintf(host->cdev.class_id, BUS_ID_SIZE,
+		 "memstick%u", host->id);
+
+	rc = class_device_add(&host->cdev);
+	if (rc) {
+		spin_lock(&memstick_host_lock);
+		idr_remove(&memstick_host_idr, host->id);
+		spin_unlock(&memstick_host_lock);
+		return rc;
+	}
+
+	host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF);
+	memstick_detect_change(host);
+	return 0;
+}
+EXPORT_SYMBOL(memstick_add_host);
+
+/**
+ * memstick_remove_host - stop request processing on memstick host
+ * @host - host to use
+ */
+void memstick_remove_host(struct memstick_host *host)
+{
+	flush_workqueue(workqueue);
+	mutex_lock(&host->lock);
+	if (host->card)
+		device_unregister(&host->card->dev);
+	host->card = NULL;
+	host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF);
+	mutex_unlock(&host->lock);
+
+	spin_lock(&memstick_host_lock);
+	idr_remove(&memstick_host_idr, host->id);
+	spin_unlock(&memstick_host_lock);
+	class_device_del(&host->cdev);
+}
+EXPORT_SYMBOL(memstick_remove_host);
+
+/**
+ * memstick_free_host - free memstick host
+ * @host - host to use
+ */
+void memstick_free_host(struct memstick_host *host)
+{
+	mutex_destroy(&host->lock);
+	class_device_put(&host->cdev);
+}
+EXPORT_SYMBOL(memstick_free_host);
+
+int memstick_register_driver(struct memstick_driver *drv)
+{
+	drv->driver.bus = &memstick_bus_type;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(memstick_register_driver);
+
+void memstick_unregister_driver(struct memstick_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(memstick_unregister_driver);
+
+
+static int __init memstick_init(void)
+{
+	int rc;
+
+	workqueue = create_freezeable_workqueue("kmemstick");
+	if (!workqueue)
+		return -ENOMEM;
+
+	rc = bus_register(&memstick_bus_type);
+	if (!rc)
+		rc = class_register(&memstick_host_class);
+
+	if (!rc)
+		return 0;
+
+	bus_unregister(&memstick_bus_type);
+	destroy_workqueue(workqueue);
+
+	return rc;
+}
+
+static void __exit memstick_exit(void)
+{
+	class_unregister(&memstick_host_class);
+	bus_unregister(&memstick_bus_type);
+	destroy_workqueue(workqueue);
+	idr_destroy(&memstick_host_idr);
+}
+
+module_init(memstick_init);
+module_exit(memstick_exit);
+
+MODULE_AUTHOR("Alex Dubov");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Sony MemoryStick core driver");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
new file mode 100644
index 000000000000..423ad8cf4bb9
--- /dev/null
+++ b/drivers/memstick/core/mspro_block.c
@@ -0,0 +1,1351 @@
+/*
+ *  Sony MemoryStick Pro storage support
+ *
+ *  Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Special thanks to Carlos Corbacho for providing various MemoryStick cards
+ * that made this driver possible.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/idr.h>
+#include <linux/hdreg.h>
+#include <linux/kthread.h>
+#include <linux/memstick.h>
+
+#define DRIVER_NAME "mspro_block"
+#define DRIVER_VERSION "0.2"
+
+static int major;
+module_param(major, int, 0644);
+
+#define MSPRO_BLOCK_MAX_SEGS  32
+#define MSPRO_BLOCK_MAX_PAGES ((2 << 16) - 1)
+
+#define MSPRO_BLOCK_SIGNATURE        0xa5c3
+#define MSPRO_BLOCK_MAX_ATTRIBUTES   41
+
+enum {
+	MSPRO_BLOCK_ID_SYSINFO         = 0x10,
+	MSPRO_BLOCK_ID_MODELNAME       = 0x15,
+	MSPRO_BLOCK_ID_MBR             = 0x20,
+	MSPRO_BLOCK_ID_PBR16           = 0x21,
+	MSPRO_BLOCK_ID_PBR32           = 0x22,
+	MSPRO_BLOCK_ID_SPECFILEVALUES1 = 0x25,
+	MSPRO_BLOCK_ID_SPECFILEVALUES2 = 0x26,
+	MSPRO_BLOCK_ID_DEVINFO         = 0x30
+};
+
+struct mspro_sys_attr {
+	size_t                  size;
+	void                    *data;
+	unsigned char           id;
+	char                    name[32];
+	struct device_attribute dev_attr;
+};
+
+struct mspro_attr_entry {
+	unsigned int  address;
+	unsigned int  size;
+	unsigned char id;
+	unsigned char reserved[3];
+} __attribute__((packed));
+
+struct mspro_attribute {
+	unsigned short          signature;
+	unsigned short          version;
+	unsigned char           count;
+	unsigned char           reserved[11];
+	struct mspro_attr_entry entries[];
+} __attribute__((packed));
+
+struct mspro_sys_info {
+	unsigned char  class;
+	unsigned char  reserved0;
+	unsigned short block_size;
+	unsigned short block_count;
+	unsigned short user_block_count;
+	unsigned short page_size;
+	unsigned char  reserved1[2];
+	unsigned char  assembly_date[8];
+	unsigned int   serial_number;
+	unsigned char  assembly_maker_code;
+	unsigned char  assembly_model_code[3];
+	unsigned short memory_maker_code;
+	unsigned short memory_model_code;
+	unsigned char  reserved2[4];
+	unsigned char  vcc;
+	unsigned char  vpp;
+	unsigned short controller_number;
+	unsigned short controller_function;
+	unsigned short start_sector;
+	unsigned short unit_size;
+	unsigned char  ms_sub_class;
+	unsigned char  reserved3[4];
+	unsigned char  interface_type;
+	unsigned short controller_code;
+	unsigned char  format_type;
+	unsigned char  reserved4;
+	unsigned char  device_type;
+	unsigned char  reserved5[7];
+	unsigned char  mspro_id[16];
+	unsigned char  reserved6[16];
+} __attribute__((packed));
+
+struct mspro_mbr {
+	unsigned char boot_partition;
+	unsigned char start_head;
+	unsigned char start_sector;
+	unsigned char start_cylinder;
+	unsigned char partition_type;
+	unsigned char end_head;
+	unsigned char end_sector;
+	unsigned char end_cylinder;
+	unsigned int  start_sectors;
+	unsigned int  sectors_per_partition;
+} __attribute__((packed));
+
+struct mspro_devinfo {
+	unsigned short cylinders;
+	unsigned short heads;
+	unsigned short bytes_per_track;
+	unsigned short bytes_per_sector;
+	unsigned short sectors_per_track;
+	unsigned char  reserved[6];
+} __attribute__((packed));
+
+struct mspro_block_data {
+	struct memstick_dev   *card;
+	unsigned int          usage_count;
+	struct gendisk        *disk;
+	struct request_queue  *queue;
+	spinlock_t            q_lock;
+	wait_queue_head_t     q_wait;
+	struct task_struct    *q_thread;
+
+	unsigned short        page_size;
+	unsigned short        cylinders;
+	unsigned short        heads;
+	unsigned short        sectors_per_track;
+
+	unsigned char         system;
+	unsigned char         read_only:1,
+			      active:1,
+			      has_request:1,
+			      data_dir:1;
+	unsigned char         transfer_cmd;
+
+	int                   (*mrq_handler)(struct memstick_dev *card,
+					     struct memstick_request **mrq);
+
+	struct attribute_group attr_group;
+
+	struct scatterlist    req_sg[MSPRO_BLOCK_MAX_SEGS];
+	unsigned int          seg_count;
+	unsigned int          current_seg;
+	unsigned short        current_page;
+};
+
+static DEFINE_IDR(mspro_block_disk_idr);
+static DEFINE_MUTEX(mspro_block_disk_lock);
+
+/*** Block device ***/
+
+static int mspro_block_bd_open(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	struct mspro_block_data *msb = disk->private_data;
+	int rc = -ENXIO;
+
+	mutex_lock(&mspro_block_disk_lock);
+
+	if (msb && msb->card) {
+		msb->usage_count++;
+		if ((filp->f_mode & FMODE_WRITE) && msb->read_only)
+			rc = -EROFS;
+		else
+			rc = 0;
+	}
+
+	mutex_unlock(&mspro_block_disk_lock);
+
+	return rc;
+}
+
+
+static int mspro_block_disk_release(struct gendisk *disk)
+{
+	struct mspro_block_data *msb = disk->private_data;
+	int disk_id = disk->first_minor >> MEMSTICK_PART_SHIFT;
+
+	mutex_lock(&mspro_block_disk_lock);
+
+	if (msb->usage_count) {
+		msb->usage_count--;
+		if (!msb->usage_count) {
+			kfree(msb);
+			disk->private_data = NULL;
+			idr_remove(&mspro_block_disk_idr, disk_id);
+			put_disk(disk);
+		}
+	}
+
+	mutex_unlock(&mspro_block_disk_lock);
+
+	return 0;
+}
+
+static int mspro_block_bd_release(struct inode *inode, struct file *filp)
+{
+	struct gendisk *disk = inode->i_bdev->bd_disk;
+	return mspro_block_disk_release(disk);
+}
+
+static int mspro_block_bd_getgeo(struct block_device *bdev,
+				 struct hd_geometry *geo)
+{
+	struct mspro_block_data *msb = bdev->bd_disk->private_data;
+
+	geo->heads = msb->heads;
+	geo->sectors = msb->sectors_per_track;
+	geo->cylinders = msb->cylinders;
+
+	return 0;
+}
+
+static struct block_device_operations ms_block_bdops = {
+	.open    = mspro_block_bd_open,
+	.release = mspro_block_bd_release,
+	.getgeo  = mspro_block_bd_getgeo,
+	.owner   = THIS_MODULE
+};
+
+/*** Information ***/
+
+static struct mspro_sys_attr *mspro_from_sysfs_attr(struct attribute *attr)
+{
+	struct device_attribute *dev_attr
+		= container_of(attr, struct device_attribute, attr);
+	return container_of(dev_attr, struct mspro_sys_attr, dev_attr);
+}
+
+static const char *mspro_block_attr_name(unsigned char tag)
+{
+	switch (tag) {
+	case MSPRO_BLOCK_ID_SYSINFO:
+		return "attr_sysinfo";
+	case MSPRO_BLOCK_ID_MODELNAME:
+		return "attr_modelname";
+	case MSPRO_BLOCK_ID_MBR:
+		return "attr_mbr";
+	case MSPRO_BLOCK_ID_PBR16:
+		return "attr_pbr16";
+	case MSPRO_BLOCK_ID_PBR32:
+		return "attr_pbr32";
+	case MSPRO_BLOCK_ID_SPECFILEVALUES1:
+		return "attr_specfilevalues1";
+	case MSPRO_BLOCK_ID_SPECFILEVALUES2:
+		return "attr_specfilevalues2";
+	case MSPRO_BLOCK_ID_DEVINFO:
+		return "attr_devinfo";
+	default:
+		return NULL;
+	};
+}
+
+typedef ssize_t (*sysfs_show_t)(struct device *dev,
+				struct device_attribute *attr,
+				char *buffer);
+
+static ssize_t mspro_block_attr_show_default(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buffer)
+{
+	struct mspro_sys_attr *s_attr = container_of(attr,
+						     struct mspro_sys_attr,
+						     dev_attr);
+
+	ssize_t cnt, rc = 0;
+
+	for (cnt = 0; cnt < s_attr->size; cnt++) {
+		if (cnt && !(cnt % 16)) {
+			if (PAGE_SIZE - rc)
+				buffer[rc++] = '\n';
+		}
+
+		rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "%02x ",
+				((unsigned char *)s_attr->data)[cnt]);
+	}
+	return rc;
+}
+
+static ssize_t mspro_block_attr_show_sysinfo(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buffer)
+{
+	struct mspro_sys_attr *x_attr = container_of(attr,
+						     struct mspro_sys_attr,
+						     dev_attr);
+	struct mspro_sys_info *x_sys = x_attr->data;
+	ssize_t rc = 0;
+
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "class: %x\n",
+			x_sys->class);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "block size: %x\n",
+			be16_to_cpu(x_sys->block_size));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "block count: %x\n",
+			be16_to_cpu(x_sys->block_count));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "user block count: %x\n",
+			be16_to_cpu(x_sys->user_block_count));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "page size: %x\n",
+			be16_to_cpu(x_sys->page_size));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "assembly date: "
+			"%d %04u-%02u-%02u %02u:%02u:%02u\n",
+			x_sys->assembly_date[0],
+			be16_to_cpu(*(unsigned short *)
+				    &x_sys->assembly_date[1]),
+			x_sys->assembly_date[3], x_sys->assembly_date[4],
+			x_sys->assembly_date[5], x_sys->assembly_date[6],
+			x_sys->assembly_date[7]);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "serial number: %x\n",
+			be32_to_cpu(x_sys->serial_number));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc,
+			"assembly maker code: %x\n",
+			x_sys->assembly_maker_code);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "assembly model code: "
+			"%02x%02x%02x\n", x_sys->assembly_model_code[0],
+			x_sys->assembly_model_code[1],
+			x_sys->assembly_model_code[2]);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "memory maker code: %x\n",
+			be16_to_cpu(x_sys->memory_maker_code));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "memory model code: %x\n",
+			be16_to_cpu(x_sys->memory_model_code));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "vcc: %x\n",
+			x_sys->vcc);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "vpp: %x\n",
+			x_sys->vpp);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "controller number: %x\n",
+			be16_to_cpu(x_sys->controller_number));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc,
+			"controller function: %x\n",
+			be16_to_cpu(x_sys->controller_function));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "start sector: %x\n",
+			be16_to_cpu(x_sys->start_sector));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "unit size: %x\n",
+			be16_to_cpu(x_sys->unit_size));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "sub class: %x\n",
+			x_sys->ms_sub_class);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "interface type: %x\n",
+			x_sys->interface_type);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "controller code: %x\n",
+			be16_to_cpu(x_sys->controller_code));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "format type: %x\n",
+			x_sys->format_type);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "device type: %x\n",
+			x_sys->device_type);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "mspro id: %s\n",
+			x_sys->mspro_id);
+	return rc;
+}
+
+static ssize_t mspro_block_attr_show_modelname(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buffer)
+{
+	struct mspro_sys_attr *s_attr = container_of(attr,
+						     struct mspro_sys_attr,
+						     dev_attr);
+
+	return scnprintf(buffer, PAGE_SIZE, "%s", (char *)s_attr->data);
+}
+
+static ssize_t mspro_block_attr_show_mbr(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buffer)
+{
+	struct mspro_sys_attr *x_attr = container_of(attr,
+						     struct mspro_sys_attr,
+						     dev_attr);
+	struct mspro_mbr *x_mbr = x_attr->data;
+	ssize_t rc = 0;
+
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "boot partition: %x\n",
+			x_mbr->boot_partition);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "start head: %x\n",
+			x_mbr->start_head);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "start sector: %x\n",
+			x_mbr->start_sector);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "start cylinder: %x\n",
+			x_mbr->start_cylinder);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "partition type: %x\n",
+			x_mbr->partition_type);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "end head: %x\n",
+			x_mbr->end_head);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "end sector: %x\n",
+			x_mbr->end_sector);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "end cylinder: %x\n",
+			x_mbr->end_cylinder);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "start sectors: %x\n",
+			x_mbr->start_sectors);
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc,
+			"sectors per partition: %x\n",
+			x_mbr->sectors_per_partition);
+	return rc;
+}
+
+static ssize_t mspro_block_attr_show_devinfo(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buffer)
+{
+	struct mspro_sys_attr *x_attr = container_of(attr,
+						     struct mspro_sys_attr,
+						     dev_attr);
+	struct mspro_devinfo *x_devinfo = x_attr->data;
+	ssize_t rc = 0;
+
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "cylinders: %x\n",
+			be16_to_cpu(x_devinfo->cylinders));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "heads: %x\n",
+			be16_to_cpu(x_devinfo->heads));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "bytes per track: %x\n",
+			be16_to_cpu(x_devinfo->bytes_per_track));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "bytes per sector: %x\n",
+			be16_to_cpu(x_devinfo->bytes_per_sector));
+	rc += scnprintf(buffer + rc, PAGE_SIZE - rc, "sectors per track: %x\n",
+			be16_to_cpu(x_devinfo->sectors_per_track));
+	return rc;
+}
+
+static sysfs_show_t mspro_block_attr_show(unsigned char tag)
+{
+	switch (tag) {
+	case MSPRO_BLOCK_ID_SYSINFO:
+		return mspro_block_attr_show_sysinfo;
+	case MSPRO_BLOCK_ID_MODELNAME:
+		return mspro_block_attr_show_modelname;
+	case MSPRO_BLOCK_ID_MBR:
+		return mspro_block_attr_show_mbr;
+	case MSPRO_BLOCK_ID_DEVINFO:
+		return mspro_block_attr_show_devinfo;
+	default:
+		return mspro_block_attr_show_default;
+	}
+}
+
+/*** Protocol handlers ***/
+
+/*
+ * Functions prefixed with "h_" are protocol callbacks. They can be called from
+ * interrupt context. Return value of 0 means that request processing is still
+ * ongoing, while special error value of -EAGAIN means that current request is
+ * finished (and request processor should come back some time later).
+ */
+
+static int h_mspro_block_req_init(struct memstick_dev *card,
+				  struct memstick_request **mrq)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+
+	*mrq = &card->current_mrq;
+	card->next_request = msb->mrq_handler;
+	return 0;
+}
+
+static int h_mspro_block_default(struct memstick_dev *card,
+				 struct memstick_request **mrq)
+{
+	complete(&card->mrq_complete);
+	if (!(*mrq)->error)
+		return -EAGAIN;
+	else
+		return (*mrq)->error;
+}
+
+static int h_mspro_block_get_ro(struct memstick_dev *card,
+				struct memstick_request **mrq)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+
+	if ((*mrq)->error) {
+		complete(&card->mrq_complete);
+		return (*mrq)->error;
+	}
+
+	if ((*mrq)->data[offsetof(struct ms_status_register, status0)]
+	    & MEMSTICK_STATUS0_WP)
+		msb->read_only = 1;
+	else
+		msb->read_only = 0;
+
+	complete(&card->mrq_complete);
+	return -EAGAIN;
+}
+
+static int h_mspro_block_wait_for_ced(struct memstick_dev *card,
+				      struct memstick_request **mrq)
+{
+	if ((*mrq)->error) {
+		complete(&card->mrq_complete);
+		return (*mrq)->error;
+	}
+
+	dev_dbg(&card->dev, "wait for ced: value %x\n", (*mrq)->data[0]);
+
+	if ((*mrq)->data[0] & (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR)) {
+		card->current_mrq.error = -EFAULT;
+		complete(&card->mrq_complete);
+		return card->current_mrq.error;
+	}
+
+	if (!((*mrq)->data[0] & MEMSTICK_INT_CED))
+		return 0;
+	else {
+		card->current_mrq.error = 0;
+		complete(&card->mrq_complete);
+		return -EAGAIN;
+	}
+}
+
+static int h_mspro_block_transfer_data(struct memstick_dev *card,
+				       struct memstick_request **mrq)
+{
+	struct memstick_host *host = card->host;
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	unsigned char t_val = 0;
+	struct scatterlist t_sg = { 0 };
+	size_t t_offset;
+
+	if ((*mrq)->error) {
+		complete(&card->mrq_complete);
+		return (*mrq)->error;
+	}
+
+	switch ((*mrq)->tpc) {
+	case MS_TPC_WRITE_REG:
+		memstick_init_req(*mrq, MS_TPC_SET_CMD, &msb->transfer_cmd, 1);
+		(*mrq)->get_int_reg = 1;
+		return 0;
+	case MS_TPC_SET_CMD:
+		t_val = (*mrq)->int_reg;
+		memstick_init_req(*mrq, MS_TPC_GET_INT, NULL, 1);
+		if (host->caps & MEMSTICK_CAP_AUTO_GET_INT)
+			goto has_int_reg;
+		return 0;
+	case MS_TPC_GET_INT:
+		t_val = (*mrq)->data[0];
+has_int_reg:
+		if (t_val & (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR)) {
+			t_val = MSPRO_CMD_STOP;
+			memstick_init_req(*mrq, MS_TPC_SET_CMD, &t_val, 1);
+			card->next_request = h_mspro_block_default;
+			return 0;
+		}
+
+		if (msb->current_page
+		    == (msb->req_sg[msb->current_seg].length
+			/ msb->page_size)) {
+			msb->current_page = 0;
+			msb->current_seg++;
+
+			if (msb->current_seg == msb->seg_count) {
+				if (t_val & MEMSTICK_INT_CED) {
+					complete(&card->mrq_complete);
+					return -EAGAIN;
+				} else {
+					card->next_request
+						= h_mspro_block_wait_for_ced;
+					memstick_init_req(*mrq, MS_TPC_GET_INT,
+							  NULL, 1);
+					return 0;
+				}
+			}
+		}
+
+		if (!(t_val & MEMSTICK_INT_BREQ)) {
+			memstick_init_req(*mrq, MS_TPC_GET_INT, NULL, 1);
+			return 0;
+		}
+
+		t_offset = msb->req_sg[msb->current_seg].offset;
+		t_offset += msb->current_page * msb->page_size;
+
+		sg_set_page(&t_sg,
+			    nth_page(sg_page(&(msb->req_sg[msb->current_seg])),
+				     t_offset >> PAGE_SHIFT),
+			    msb->page_size, offset_in_page(t_offset));
+
+		memstick_init_req_sg(*mrq, msb->data_dir == READ
+					   ? MS_TPC_READ_LONG_DATA
+					   : MS_TPC_WRITE_LONG_DATA,
+				     &t_sg);
+		(*mrq)->get_int_reg = 1;
+		return 0;
+	case MS_TPC_READ_LONG_DATA:
+	case MS_TPC_WRITE_LONG_DATA:
+		msb->current_page++;
+		if (host->caps & MEMSTICK_CAP_AUTO_GET_INT) {
+			t_val = (*mrq)->int_reg;
+			goto has_int_reg;
+		} else {
+			memstick_init_req(*mrq, MS_TPC_GET_INT, NULL, 1);
+			return 0;
+		}
+
+	default:
+		BUG();
+	}
+}
+
+/*** Data transfer ***/
+
+static void mspro_block_process_request(struct memstick_dev *card,
+					struct request *req)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct mspro_param_register param;
+	int rc, chunk, cnt;
+	unsigned short page_count;
+	sector_t t_sec;
+	unsigned long flags;
+
+	do {
+		page_count = 0;
+		msb->current_seg = 0;
+		msb->seg_count = blk_rq_map_sg(req->q, req, msb->req_sg);
+
+		if (msb->seg_count) {
+			msb->current_page = 0;
+			for (rc = 0; rc < msb->seg_count; rc++)
+				page_count += msb->req_sg[rc].length
+					      / msb->page_size;
+
+			t_sec = req->sector;
+			sector_div(t_sec, msb->page_size >> 9);
+			param.system = msb->system;
+			param.data_count = cpu_to_be16(page_count);
+			param.data_address = cpu_to_be32((uint32_t)t_sec);
+			param.cmd_param = 0;
+
+			msb->data_dir = rq_data_dir(req);
+			msb->transfer_cmd = msb->data_dir == READ
+					    ? MSPRO_CMD_READ_DATA
+					    : MSPRO_CMD_WRITE_DATA;
+
+			dev_dbg(&card->dev, "data transfer: cmd %x, "
+				"lba %x, count %x\n", msb->transfer_cmd,
+				be32_to_cpu(param.data_address),
+				page_count);
+
+			card->next_request = h_mspro_block_req_init;
+			msb->mrq_handler = h_mspro_block_transfer_data;
+			memstick_init_req(&card->current_mrq, MS_TPC_WRITE_REG,
+					  &param, sizeof(param));
+			memstick_new_req(card->host);
+			wait_for_completion(&card->mrq_complete);
+			rc = card->current_mrq.error;
+
+			if (rc || (card->current_mrq.tpc == MSPRO_CMD_STOP)) {
+				for (cnt = 0; cnt < msb->current_seg; cnt++)
+					page_count += msb->req_sg[cnt].length
+						      / msb->page_size;
+
+				if (msb->current_page)
+					page_count += msb->current_page - 1;
+
+				if (page_count && (msb->data_dir == READ))
+					rc = msb->page_size * page_count;
+				else
+					rc = -EIO;
+			} else
+				rc = msb->page_size * page_count;
+		} else
+			rc = -EFAULT;
+
+		spin_lock_irqsave(&msb->q_lock, flags);
+		if (rc >= 0)
+			chunk = __blk_end_request(req, 0, rc);
+		else
+			chunk = __blk_end_request(req, rc, 0);
+
+		dev_dbg(&card->dev, "end chunk %d, %d\n", rc, chunk);
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+	} while (chunk);
+}
+
+static int mspro_block_has_request(struct mspro_block_data *msb)
+{
+	int rc = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	if (kthread_should_stop() || msb->has_request)
+		rc = 1;
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+	return rc;
+}
+
+static int mspro_block_queue_thread(void *data)
+{
+	struct memstick_dev *card = data;
+	struct memstick_host *host = card->host;
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct request *req;
+	unsigned long flags;
+
+	while (1) {
+		wait_event(msb->q_wait, mspro_block_has_request(msb));
+		dev_dbg(&card->dev, "thread iter\n");
+
+		spin_lock_irqsave(&msb->q_lock, flags);
+		req = elv_next_request(msb->queue);
+		dev_dbg(&card->dev, "next req %p\n", req);
+		if (!req) {
+			msb->has_request = 0;
+			if (kthread_should_stop()) {
+				spin_unlock_irqrestore(&msb->q_lock, flags);
+				break;
+			}
+		} else
+			msb->has_request = 1;
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+
+		if (req) {
+			mutex_lock(&host->lock);
+			mspro_block_process_request(card, req);
+			mutex_unlock(&host->lock);
+		}
+	}
+	dev_dbg(&card->dev, "thread finished\n");
+	return 0;
+}
+
+static void mspro_block_request(struct request_queue *q)
+{
+	struct memstick_dev *card = q->queuedata;
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct request *req = NULL;
+
+	if (msb->q_thread) {
+		msb->has_request = 1;
+		wake_up_all(&msb->q_wait);
+	} else {
+		while ((req = elv_next_request(q)) != NULL)
+			end_queued_request(req, -ENODEV);
+	}
+}
+
+/*** Initialization ***/
+
+static int mspro_block_wait_for_ced(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+
+	card->next_request = h_mspro_block_req_init;
+	msb->mrq_handler = h_mspro_block_wait_for_ced;
+	memstick_init_req(&card->current_mrq, MS_TPC_GET_INT, NULL, 1);
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+	return card->current_mrq.error;
+}
+
+static int mspro_block_switch_to_parallel(struct memstick_dev *card)
+{
+	struct memstick_host *host = card->host;
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct mspro_param_register param = {
+		.system = 0,
+		.data_count = 0,
+		.data_address = 0,
+		.cmd_param = 0
+	};
+
+	card->next_request = h_mspro_block_req_init;
+	msb->mrq_handler = h_mspro_block_default;
+	memstick_init_req(&card->current_mrq, MS_TPC_WRITE_REG, &param,
+			  sizeof(param));
+	memstick_new_req(host);
+	wait_for_completion(&card->mrq_complete);
+	if (card->current_mrq.error)
+		return card->current_mrq.error;
+
+	msb->system = 0;
+	host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PARALLEL);
+
+	card->next_request = h_mspro_block_req_init;
+	msb->mrq_handler = h_mspro_block_default;
+	memstick_init_req(&card->current_mrq, MS_TPC_GET_INT, NULL, 1);
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+
+	if (card->current_mrq.error) {
+		msb->system = 0x80;
+		host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/* Memory allocated for attributes by this function should be freed by
+ * mspro_block_data_clear, no matter if the initialization process succeded
+ * or failed.
+ */
+static int mspro_block_read_attributes(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct mspro_param_register param = {
+		.system = msb->system,
+		.data_count = cpu_to_be16(1),
+		.data_address = 0,
+		.cmd_param = 0
+	};
+	struct mspro_attribute *attr = NULL;
+	struct mspro_sys_attr *s_attr = NULL;
+	unsigned char *buffer = NULL;
+	int cnt, rc, attr_count;
+	unsigned int addr;
+	unsigned short page_count;
+
+	attr = kmalloc(msb->page_size, GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	sg_init_one(&msb->req_sg[0], attr, msb->page_size);
+	msb->seg_count = 1;
+	msb->current_seg = 0;
+	msb->current_page = 0;
+	msb->data_dir = READ;
+	msb->transfer_cmd = MSPRO_CMD_READ_ATRB;
+
+	card->next_request = h_mspro_block_req_init;
+	msb->mrq_handler = h_mspro_block_transfer_data;
+	memstick_init_req(&card->current_mrq, MS_TPC_WRITE_REG, &param,
+			  sizeof(param));
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+	if (card->current_mrq.error) {
+		rc = card->current_mrq.error;
+		goto out_free_attr;
+	}
+
+	if (be16_to_cpu(attr->signature) != MSPRO_BLOCK_SIGNATURE) {
+		printk(KERN_ERR "%s: unrecognized device signature %x\n",
+		       card->dev.bus_id, be16_to_cpu(attr->signature));
+		rc = -ENODEV;
+		goto out_free_attr;
+	}
+
+	if (attr->count > MSPRO_BLOCK_MAX_ATTRIBUTES) {
+		printk(KERN_WARNING "%s: way too many attribute entries\n",
+		       card->dev.bus_id);
+		attr_count = MSPRO_BLOCK_MAX_ATTRIBUTES;
+	} else
+		attr_count = attr->count;
+
+	msb->attr_group.attrs = kzalloc((attr_count + 1)
+					* sizeof(struct attribute),
+					GFP_KERNEL);
+	if (!msb->attr_group.attrs) {
+		rc = -ENOMEM;
+		goto out_free_attr;
+	}
+	msb->attr_group.name = "media_attributes";
+
+	buffer = kmalloc(msb->page_size, GFP_KERNEL);
+	if (!buffer) {
+		rc = -ENOMEM;
+		goto out_free_attr;
+	}
+	memcpy(buffer, (char *)attr, msb->page_size);
+	page_count = 1;
+
+	for (cnt = 0; cnt < attr_count; ++cnt) {
+		s_attr = kzalloc(sizeof(struct mspro_sys_attr), GFP_KERNEL);
+		if (!s_attr) {
+			rc = -ENOMEM;
+			goto out_free_buffer;
+		}
+
+		msb->attr_group.attrs[cnt] = &s_attr->dev_attr.attr;
+		addr = be32_to_cpu(attr->entries[cnt].address);
+		rc = be32_to_cpu(attr->entries[cnt].size);
+		dev_dbg(&card->dev, "adding attribute %d: id %x, address %x, "
+			"size %x\n", cnt, attr->entries[cnt].id, addr, rc);
+		s_attr->id = attr->entries[cnt].id;
+		if (mspro_block_attr_name(s_attr->id))
+			snprintf(s_attr->name, sizeof(s_attr->name), "%s",
+				 mspro_block_attr_name(attr->entries[cnt].id));
+		else
+			snprintf(s_attr->name, sizeof(s_attr->name),
+				 "attr_x%02x", attr->entries[cnt].id);
+
+		s_attr->dev_attr.attr.name = s_attr->name;
+		s_attr->dev_attr.attr.mode = S_IRUGO;
+		s_attr->dev_attr.attr.owner = THIS_MODULE;
+		s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
+
+		if (!rc)
+			continue;
+
+		s_attr->size = rc;
+		s_attr->data = kmalloc(rc, GFP_KERNEL);
+		if (!s_attr->data) {
+			rc = -ENOMEM;
+			goto out_free_buffer;
+		}
+
+		if (((addr / msb->page_size)
+		     == be32_to_cpu(param.data_address))
+		    && (((addr + rc - 1) / msb->page_size)
+			== be32_to_cpu(param.data_address))) {
+			memcpy(s_attr->data, buffer + addr % msb->page_size,
+			       rc);
+			continue;
+		}
+
+		if (page_count <= (rc / msb->page_size)) {
+			kfree(buffer);
+			page_count = (rc / msb->page_size) + 1;
+			buffer = kmalloc(page_count * msb->page_size,
+					 GFP_KERNEL);
+			if (!buffer) {
+				rc = -ENOMEM;
+				goto out_free_attr;
+			}
+		}
+
+		param.system = msb->system;
+		param.data_count = cpu_to_be16((rc / msb->page_size) + 1);
+		param.data_address = cpu_to_be32(addr / msb->page_size);
+		param.cmd_param = 0;
+
+		sg_init_one(&msb->req_sg[0], buffer,
+			    be16_to_cpu(param.data_count) * msb->page_size);
+		msb->seg_count = 1;
+		msb->current_seg = 0;
+		msb->current_page = 0;
+		msb->data_dir = READ;
+		msb->transfer_cmd = MSPRO_CMD_READ_ATRB;
+
+		dev_dbg(&card->dev, "reading attribute pages %x, %x\n",
+			be32_to_cpu(param.data_address),
+			be16_to_cpu(param.data_count));
+
+		card->next_request = h_mspro_block_req_init;
+		msb->mrq_handler = h_mspro_block_transfer_data;
+		memstick_init_req(&card->current_mrq, MS_TPC_WRITE_REG,
+				  (char *)&param, sizeof(param));
+		memstick_new_req(card->host);
+		wait_for_completion(&card->mrq_complete);
+		if (card->current_mrq.error) {
+			rc = card->current_mrq.error;
+			goto out_free_buffer;
+		}
+
+		memcpy(s_attr->data, buffer + addr % msb->page_size, rc);
+	}
+
+	rc = 0;
+out_free_buffer:
+	kfree(buffer);
+out_free_attr:
+	kfree(attr);
+	return rc;
+}
+
+static int mspro_block_init_card(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct memstick_host *host = card->host;
+	int rc = 0;
+
+	msb->system = 0x80;
+	card->reg_addr.r_offset = offsetof(struct mspro_register, status);
+	card->reg_addr.r_length = sizeof(struct ms_status_register);
+	card->reg_addr.w_offset = offsetof(struct mspro_register, param);
+	card->reg_addr.w_length = sizeof(struct mspro_param_register);
+
+	if (memstick_set_rw_addr(card))
+		return -EIO;
+
+	if (host->caps & MEMSTICK_CAP_PARALLEL) {
+		if (mspro_block_switch_to_parallel(card))
+			printk(KERN_WARNING "%s: could not switch to "
+			       "parallel interface\n", card->dev.bus_id);
+	}
+
+	rc = mspro_block_wait_for_ced(card);
+	if (rc)
+		return rc;
+	dev_dbg(&card->dev, "card activated\n");
+
+	card->next_request = h_mspro_block_req_init;
+	msb->mrq_handler = h_mspro_block_get_ro;
+	memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL,
+			  sizeof(struct ms_status_register));
+	memstick_new_req(card->host);
+	wait_for_completion(&card->mrq_complete);
+	if (card->current_mrq.error)
+		return card->current_mrq.error;
+
+	dev_dbg(&card->dev, "card r/w status %d\n", msb->read_only ? 0 : 1);
+
+	msb->page_size = 512;
+	rc = mspro_block_read_attributes(card);
+	if (rc)
+		return rc;
+
+	dev_dbg(&card->dev, "attributes loaded\n");
+	return 0;
+
+}
+
+static int mspro_block_init_disk(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct memstick_host *host = card->host;
+	struct mspro_devinfo *dev_info = NULL;
+	struct mspro_sys_info *sys_info = NULL;
+	struct mspro_sys_attr *s_attr = NULL;
+	int rc, disk_id;
+	u64 limit = BLK_BOUNCE_HIGH;
+	unsigned long capacity;
+
+	if (host->cdev.dev->dma_mask && *(host->cdev.dev->dma_mask))
+		limit = *(host->cdev.dev->dma_mask);
+
+	for (rc = 0; msb->attr_group.attrs[rc]; ++rc) {
+		s_attr = mspro_from_sysfs_attr(msb->attr_group.attrs[rc]);
+
+		if (s_attr->id == MSPRO_BLOCK_ID_DEVINFO)
+			dev_info = s_attr->data;
+		else if (s_attr->id == MSPRO_BLOCK_ID_SYSINFO)
+			sys_info = s_attr->data;
+	}
+
+	if (!dev_info || !sys_info)
+		return -ENODEV;
+
+	msb->cylinders = be16_to_cpu(dev_info->cylinders);
+	msb->heads = be16_to_cpu(dev_info->heads);
+	msb->sectors_per_track = be16_to_cpu(dev_info->sectors_per_track);
+
+	msb->page_size = be16_to_cpu(sys_info->unit_size);
+
+	if (!idr_pre_get(&mspro_block_disk_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	mutex_lock(&mspro_block_disk_lock);
+	rc = idr_get_new(&mspro_block_disk_idr, card, &disk_id);
+	mutex_unlock(&mspro_block_disk_lock);
+
+	if (rc)
+		return rc;
+
+	if ((disk_id << MEMSTICK_PART_SHIFT) > 255) {
+		rc = -ENOSPC;
+		goto out_release_id;
+	}
+
+	msb->disk = alloc_disk(1 << MEMSTICK_PART_SHIFT);
+	if (!msb->disk) {
+		rc = -ENOMEM;
+		goto out_release_id;
+	}
+
+	spin_lock_init(&msb->q_lock);
+	init_waitqueue_head(&msb->q_wait);
+
+	msb->queue = blk_init_queue(mspro_block_request, &msb->q_lock);
+	if (!msb->queue) {
+		rc = -ENOMEM;
+		goto out_put_disk;
+	}
+
+	msb->queue->queuedata = card;
+
+	blk_queue_bounce_limit(msb->queue, limit);
+	blk_queue_max_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
+	blk_queue_max_phys_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
+	blk_queue_max_hw_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
+	blk_queue_max_segment_size(msb->queue,
+				   MSPRO_BLOCK_MAX_PAGES * msb->page_size);
+
+	msb->disk->major = major;
+	msb->disk->first_minor = disk_id << MEMSTICK_PART_SHIFT;
+	msb->disk->fops = &ms_block_bdops;
+	msb->usage_count = 1;
+	msb->disk->private_data = msb;
+	msb->disk->queue = msb->queue;
+	msb->disk->driverfs_dev = &card->dev;
+
+	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
+
+	blk_queue_hardsect_size(msb->queue, msb->page_size);
+
+	capacity = be16_to_cpu(sys_info->user_block_count);
+	capacity *= be16_to_cpu(sys_info->block_size);
+	capacity *= msb->page_size >> 9;
+	set_capacity(msb->disk, capacity);
+	dev_dbg(&card->dev, "capacity set %ld\n", capacity);
+	msb->q_thread = kthread_run(mspro_block_queue_thread, card,
+				    DRIVER_NAME"d");
+	if (IS_ERR(msb->q_thread))
+		goto out_put_disk;
+
+	mutex_unlock(&host->lock);
+	add_disk(msb->disk);
+	mutex_lock(&host->lock);
+	msb->active = 1;
+	return 0;
+
+out_put_disk:
+	put_disk(msb->disk);
+out_release_id:
+	mutex_lock(&mspro_block_disk_lock);
+	idr_remove(&mspro_block_disk_idr, disk_id);
+	mutex_unlock(&mspro_block_disk_lock);
+	return rc;
+}
+
+static void mspro_block_data_clear(struct mspro_block_data *msb)
+{
+	int cnt;
+	struct mspro_sys_attr *s_attr;
+
+	if (msb->attr_group.attrs) {
+		for (cnt = 0; msb->attr_group.attrs[cnt]; ++cnt) {
+			s_attr = mspro_from_sysfs_attr(msb->attr_group
+							   .attrs[cnt]);
+			kfree(s_attr->data);
+			kfree(s_attr);
+		}
+		kfree(msb->attr_group.attrs);
+	}
+
+	msb->card = NULL;
+}
+
+static int mspro_block_check_card(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+
+	return (msb->active == 1);
+}
+
+static int mspro_block_probe(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb;
+	int rc = 0;
+
+	msb = kzalloc(sizeof(struct mspro_block_data), GFP_KERNEL);
+	if (!msb)
+		return -ENOMEM;
+	memstick_set_drvdata(card, msb);
+	msb->card = card;
+
+	rc = mspro_block_init_card(card);
+
+	if (rc)
+		goto out_free;
+
+	rc = sysfs_create_group(&card->dev.kobj, &msb->attr_group);
+	if (rc)
+		goto out_free;
+
+	rc = mspro_block_init_disk(card);
+	if (!rc) {
+		card->check = mspro_block_check_card;
+		return 0;
+	}
+
+	sysfs_remove_group(&card->dev.kobj, &msb->attr_group);
+out_free:
+	memstick_set_drvdata(card, NULL);
+	mspro_block_data_clear(msb);
+	kfree(msb);
+	return rc;
+}
+
+static void mspro_block_remove(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct task_struct *q_thread = NULL;
+	unsigned long flags;
+
+	del_gendisk(msb->disk);
+	dev_dbg(&card->dev, "mspro block remove\n");
+	spin_lock_irqsave(&msb->q_lock, flags);
+	q_thread = msb->q_thread;
+	msb->q_thread = NULL;
+	msb->active = 0;
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	if (q_thread) {
+		mutex_unlock(&card->host->lock);
+		kthread_stop(q_thread);
+		mutex_lock(&card->host->lock);
+	}
+
+	dev_dbg(&card->dev, "queue thread stopped\n");
+
+	blk_cleanup_queue(msb->queue);
+
+	sysfs_remove_group(&card->dev.kobj, &msb->attr_group);
+
+	mutex_lock(&mspro_block_disk_lock);
+	mspro_block_data_clear(msb);
+	mutex_unlock(&mspro_block_disk_lock);
+
+	mspro_block_disk_release(msb->disk);
+	memstick_set_drvdata(card, NULL);
+}
+
+#ifdef CONFIG_PM
+
+static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	struct task_struct *q_thread = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	q_thread = msb->q_thread;
+	msb->q_thread = NULL;
+	msb->active = 0;
+	blk_stop_queue(msb->queue);
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+
+	if (q_thread)
+		kthread_stop(q_thread);
+
+	return 0;
+}
+
+static int mspro_block_resume(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	unsigned long flags;
+	int rc = 0;
+
+#ifdef CONFIG_MEMSTICK_UNSAFE_RESUME
+
+	struct mspro_block_data *new_msb;
+	struct memstick_host *host = card->host;
+	struct mspro_sys_attr *s_attr, *r_attr;
+	unsigned char cnt;
+
+	mutex_lock(&host->lock);
+	new_msb = kzalloc(sizeof(struct mspro_block_data), GFP_KERNEL);
+	if (!new_msb) {
+		rc = -ENOMEM;
+		goto out_unlock;
+	}
+
+	new_msb->card = card;
+	memstick_set_drvdata(card, new_msb);
+	if (mspro_block_init_card(card))
+		goto out_free;
+
+	for (cnt = 0; new_msb->attr_group.attrs[cnt]
+		      && msb->attr_group.attrs[cnt]; ++cnt) {
+		s_attr = mspro_from_sysfs_attr(new_msb->attr_group.attrs[cnt]);
+		r_attr = mspro_from_sysfs_attr(msb->attr_group.attrs[cnt]);
+
+		if (s_attr->id == MSPRO_BLOCK_ID_SYSINFO
+		    && r_attr->id == s_attr->id) {
+			if (memcmp(s_attr->data, r_attr->data, s_attr->size))
+				break;
+
+			memstick_set_drvdata(card, msb);
+			msb->q_thread = kthread_run(mspro_block_queue_thread,
+						    card, DRIVER_NAME"d");
+			if (IS_ERR(msb->q_thread))
+				msb->q_thread = NULL;
+			else
+				msb->active = 1;
+
+			break;
+		}
+	}
+
+out_free:
+	memstick_set_drvdata(card, msb);
+	mspro_block_data_clear(new_msb);
+	kfree(new_msb);
+out_unlock:
+	mutex_unlock(&host->lock);
+
+#endif /* CONFIG_MEMSTICK_UNSAFE_RESUME */
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	blk_start_queue(msb->queue);
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+	return rc;
+}
+
+#else
+
+#define mspro_block_suspend NULL
+#define mspro_block_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct memstick_device_id mspro_block_id_tbl[] = {
+	{MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_PRO, MEMSTICK_CATEGORY_STORAGE_DUO,
+	 MEMSTICK_CLASS_GENERIC_DUO},
+	{}
+};
+
+
+static struct memstick_driver mspro_block_driver = {
+	.driver = {
+		.name  = DRIVER_NAME,
+		.owner = THIS_MODULE
+	},
+	.id_table = mspro_block_id_tbl,
+	.probe    = mspro_block_probe,
+	.remove   = mspro_block_remove,
+	.suspend  = mspro_block_suspend,
+	.resume   = mspro_block_resume
+};
+
+static int __init mspro_block_init(void)
+{
+	int rc = -ENOMEM;
+
+	rc = register_blkdev(major, DRIVER_NAME);
+	if (rc < 0) {
+		printk(KERN_ERR DRIVER_NAME ": failed to register "
+		       "major %d, error %d\n", major, rc);
+		return rc;
+	}
+	if (!major)
+		major = rc;
+
+	rc = memstick_register_driver(&mspro_block_driver);
+	if (rc)
+		unregister_blkdev(major, DRIVER_NAME);
+	return rc;
+}
+
+static void __exit mspro_block_exit(void)
+{
+	memstick_unregister_driver(&mspro_block_driver);
+	unregister_blkdev(major, DRIVER_NAME);
+	idr_destroy(&mspro_block_disk_idr);
+}
+
+module_init(mspro_block_init);
+module_exit(mspro_block_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("Sony MemoryStickPro block device driver");
+MODULE_DEVICE_TABLE(memstick, mspro_block_id_tbl);
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/memstick/host/Kconfig b/drivers/memstick/host/Kconfig
new file mode 100644
index 000000000000..c002fcc3c879
--- /dev/null
+++ b/drivers/memstick/host/Kconfig
@@ -0,0 +1,22 @@
+#
+# MemoryStick host controller drivers
+#
+
+comment "MemoryStick Host Controller Drivers"
+
+config MEMSTICK_TIFM_MS
+	tristate "TI Flash Media MemoryStick Interface support  (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PCI
+	select TIFM_CORE
+	help
+	  Say Y here if you want to be able to access MemoryStick cards with
+	  the Texas Instruments(R) Flash Media card reader, found in many
+	  laptops.
+	  This option 'selects' (turns on, enables) 'TIFM_CORE', but you
+	  probably also need appropriate card reader host adapter, such as
+	  'Misc devices: TI Flash Media PCI74xx/PCI76xx host adapter support
+	  (TIFM_7XX1)'.
+
+          To compile this driver as a module, choose M here: the
+	  module will be called tifm_ms.
+
diff --git a/drivers/memstick/host/Makefile b/drivers/memstick/host/Makefile
new file mode 100644
index 000000000000..ee666380efa1
--- /dev/null
+++ b/drivers/memstick/host/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for MemoryStick host controller drivers
+#
+
+ifeq ($(CONFIG_MEMSTICK_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
+obj-$(CONFIG_MEMSTICK_TIFM_MS)	+= tifm_ms.o
+
diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
new file mode 100644
index 000000000000..f55b71a4337d
--- /dev/null
+++ b/drivers/memstick/host/tifm_ms.c
@@ -0,0 +1,685 @@
+/*
+ *  TI FlashMedia driver
+ *
+ *  Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Special thanks to Carlos Corbacho for providing various MemoryStick cards
+ * that made this driver possible.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/memstick.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+#include <linux/log2.h>
+#include <asm/io.h>
+
+#define DRIVER_NAME "tifm_ms"
+#define DRIVER_VERSION "0.1"
+
+static int no_dma;
+module_param(no_dma, bool, 0644);
+
+#define TIFM_MS_TIMEOUT      0x00100
+#define TIFM_MS_BADCRC       0x00200
+#define TIFM_MS_EOTPC        0x01000
+#define TIFM_MS_INT          0x02000
+
+/* The meaning of the bit majority in this constant is unknown. */
+#define TIFM_MS_SERIAL       0x04010
+
+#define TIFM_MS_SYS_LATCH    0x00100
+#define TIFM_MS_SYS_NOT_RDY  0x00800
+#define TIFM_MS_SYS_DATA     0x10000
+
+/* Hardware flags */
+enum {
+	CMD_READY  = 0x0001,
+	FIFO_READY = 0x0002,
+	CARD_READY = 0x0004,
+	DATA_CARRY = 0x0008
+};
+
+struct tifm_ms {
+	struct tifm_dev         *dev;
+	unsigned short          eject:1,
+				no_dma:1;
+	unsigned short          cmd_flags;
+	unsigned int            mode_mask;
+	unsigned int            block_pos;
+	unsigned long           timeout_jiffies;
+
+	struct timer_list       timer;
+	struct memstick_request *req;
+	unsigned int            io_word;
+};
+
+static void tifm_ms_read_fifo(struct tifm_ms *host, unsigned int fifo_offset,
+			      struct page *pg, unsigned int page_off,
+			      unsigned int length)
+{
+	struct tifm_dev *sock = host->dev;
+	unsigned int cnt = 0, off = 0;
+	unsigned char *buf = kmap_atomic(pg, KM_BIO_DST_IRQ) + page_off;
+
+	if (host->cmd_flags & DATA_CARRY) {
+		while ((fifo_offset & 3) && length) {
+			buf[off++] = host->io_word & 0xff;
+			host->io_word >>= 8;
+			length--;
+			fifo_offset++;
+		}
+		if (!(fifo_offset & 3))
+			host->cmd_flags &= ~DATA_CARRY;
+		if (!length)
+			return;
+	}
+
+	do {
+		host->io_word = readl(sock->addr + SOCK_FIFO_ACCESS
+				      + fifo_offset);
+		cnt = 4;
+		while (length && cnt) {
+			buf[off++] = (host->io_word >> 8) & 0xff;
+			cnt--;
+			length--;
+		}
+		fifo_offset += 4 - cnt;
+	} while (length);
+
+	if (cnt)
+		host->cmd_flags |= DATA_CARRY;
+
+	kunmap_atomic(buf - page_off, KM_BIO_DST_IRQ);
+}
+
+static void tifm_ms_write_fifo(struct tifm_ms *host, unsigned int fifo_offset,
+			       struct page *pg, unsigned int page_off,
+			       unsigned int length)
+{
+	struct tifm_dev *sock = host->dev;
+	unsigned int cnt = 0, off = 0;
+	unsigned char *buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + page_off;
+
+	if (host->cmd_flags & DATA_CARRY) {
+		while (fifo_offset & 3) {
+			host->io_word |= buf[off++] << (8 * (fifo_offset & 3));
+			length--;
+			fifo_offset++;
+		}
+		if (!(fifo_offset & 3)) {
+			writel(host->io_word, sock->addr + SOCK_FIFO_ACCESS
+			       + fifo_offset - 4);
+
+			host->cmd_flags &= ~DATA_CARRY;
+		}
+		if (!length)
+			return;
+	}
+
+	do {
+		cnt = 4;
+		host->io_word = 0;
+		while (length && cnt) {
+			host->io_word |= buf[off++] << (4 - cnt);
+			cnt--;
+			length--;
+		}
+		fifo_offset += 4 - cnt;
+		if (!cnt)
+			writel(host->io_word, sock->addr + SOCK_FIFO_ACCESS
+					      + fifo_offset - 4);
+
+	} while (length);
+
+	if (cnt)
+		host->cmd_flags |= DATA_CARRY;
+
+	kunmap_atomic(buf - page_off, KM_BIO_SRC_IRQ);
+}
+
+static void tifm_ms_move_block(struct tifm_ms *host, unsigned int length)
+{
+	unsigned int t_size;
+	unsigned int off = host->req->sg.offset + host->block_pos;
+	unsigned int p_off, p_cnt;
+	struct page *pg;
+	unsigned long flags;
+
+	dev_dbg(&host->dev->dev, "moving block\n");
+	local_irq_save(flags);
+	t_size = length;
+	while (t_size) {
+		pg = nth_page(sg_page(&host->req->sg), off >> PAGE_SHIFT);
+		p_off = offset_in_page(off);
+		p_cnt = PAGE_SIZE - p_off;
+		p_cnt = min(p_cnt, t_size);
+
+		if (host->req->data_dir == WRITE)
+			tifm_ms_write_fifo(host, length - t_size,
+					   pg, p_off, p_cnt);
+		else
+			tifm_ms_read_fifo(host, length - t_size,
+					  pg, p_off, p_cnt);
+
+		t_size -= p_cnt;
+	}
+	local_irq_restore(flags);
+}
+
+static int tifm_ms_transfer_data(struct tifm_ms *host, int skip)
+{
+	struct tifm_dev *sock = host->dev;
+	unsigned int length = host->req->sg.length - host->block_pos;
+
+	if (!length)
+		return 1;
+
+	if (length > TIFM_FIFO_SIZE)
+		length = TIFM_FIFO_SIZE;
+
+	if (!skip) {
+		tifm_ms_move_block(host, length);
+		host->block_pos += length;
+	}
+
+	if ((host->req->data_dir == READ)
+	    && (host->block_pos == host->req->sg.length))
+		return 1;
+
+	writel(ilog2(length) - 2, sock->addr + SOCK_FIFO_PAGE_SIZE);
+	if (host->req->data_dir == WRITE)
+		writel((1 << 8) | TIFM_DMA_TX, sock->addr + SOCK_DMA_CONTROL);
+	else
+		writel((1 << 8), sock->addr + SOCK_DMA_CONTROL);
+
+	return 0;
+}
+
+static int tifm_ms_issue_cmd(struct tifm_ms *host)
+{
+	struct tifm_dev *sock = host->dev;
+	unsigned char *data;
+	unsigned int data_len = 0, cmd = 0, cmd_mask = 0, cnt, tval = 0;
+
+	host->cmd_flags = 0;
+
+	if (host->req->io_type == MEMSTICK_IO_SG) {
+		if (!host->no_dma) {
+			if (1 != tifm_map_sg(sock, &host->req->sg, 1,
+					     host->req->data_dir == READ
+					     ? PCI_DMA_FROMDEVICE
+					     : PCI_DMA_TODEVICE)) {
+				host->req->error = -ENOMEM;
+				return host->req->error;
+			}
+			data_len = sg_dma_len(&host->req->sg);
+		} else
+			data_len = host->req->sg.length;
+
+		writel(TIFM_FIFO_INT_SETALL,
+		       sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR);
+		writel(TIFM_FIFO_ENABLE,
+		       sock->addr + SOCK_FIFO_CONTROL);
+		writel(TIFM_FIFO_INTMASK,
+		       sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET);
+
+		if (!host->no_dma) {
+			writel(ilog2(data_len) - 2,
+			       sock->addr + SOCK_FIFO_PAGE_SIZE);
+			writel(sg_dma_address(&host->req->sg),
+			       sock->addr + SOCK_DMA_ADDRESS);
+			if (host->req->data_dir == WRITE)
+				writel((1 << 8) | TIFM_DMA_TX | TIFM_DMA_EN,
+				       sock->addr + SOCK_DMA_CONTROL);
+			else
+				writel((1 << 8) | TIFM_DMA_EN,
+				       sock->addr + SOCK_DMA_CONTROL);
+		} else {
+			tifm_ms_transfer_data(host,
+					      host->req->data_dir == READ);
+		}
+
+		cmd_mask = readl(sock->addr + SOCK_MS_SYSTEM);
+		cmd_mask |= TIFM_MS_SYS_DATA | TIFM_MS_SYS_NOT_RDY;
+		writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM);
+	} else if (host->req->io_type == MEMSTICK_IO_VAL) {
+		data = host->req->data;
+		data_len = host->req->data_len;
+
+		cmd_mask = host->mode_mask | 0x2607; /* unknown constant */
+
+		if (host->req->data_dir == WRITE) {
+			cmd_mask |= TIFM_MS_SYS_LATCH;
+			writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM);
+			for (cnt = 0; (data_len - cnt) >= 4; cnt += 4) {
+				writel(TIFM_MS_SYS_LATCH
+				       | readl(sock->addr + SOCK_MS_SYSTEM),
+				       sock->addr + SOCK_MS_SYSTEM);
+				__raw_writel(*(unsigned int *)(data + cnt),
+					     sock->addr + SOCK_MS_DATA);
+				dev_dbg(&sock->dev, "writing %x\n",
+					*(int *)(data + cnt));
+			}
+			switch (data_len - cnt) {
+			case 3:
+				tval |= data[cnt + 2] << 16;
+			case 2:
+				tval |= data[cnt + 1] << 8;
+			case 1:
+				tval |= data[cnt];
+				writel(TIFM_MS_SYS_LATCH
+				       | readl(sock->addr + SOCK_MS_SYSTEM),
+				       sock->addr + SOCK_MS_SYSTEM);
+				writel(tval, sock->addr + SOCK_MS_DATA);
+				dev_dbg(&sock->dev, "writing %x\n", tval);
+			}
+
+			writel(TIFM_MS_SYS_LATCH
+			       | readl(sock->addr + SOCK_MS_SYSTEM),
+			       sock + SOCK_MS_SYSTEM);
+			writel(0, sock->addr + SOCK_MS_DATA);
+			dev_dbg(&sock->dev, "writing %x\n", 0);
+
+		} else
+			writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM);
+
+		cmd_mask = readl(sock->addr + SOCK_MS_SYSTEM);
+		cmd_mask &= ~TIFM_MS_SYS_DATA;
+		cmd_mask |= TIFM_MS_SYS_NOT_RDY;
+		dev_dbg(&sock->dev, "mask %x\n", cmd_mask);
+		writel(cmd_mask, sock->addr + SOCK_MS_SYSTEM);
+	} else
+		BUG();
+
+	mod_timer(&host->timer, jiffies + host->timeout_jiffies);
+	writel(TIFM_CTRL_LED | readl(sock->addr + SOCK_CONTROL),
+	       sock->addr + SOCK_CONTROL);
+	host->req->error = 0;
+
+	cmd = (host->req->tpc & 0xf) << 12;
+	cmd |= data_len;
+	writel(cmd, sock->addr + SOCK_MS_COMMAND);
+
+	dev_dbg(&sock->dev, "executing TPC %x, %x\n", cmd, cmd_mask);
+	return 0;
+}
+
+static void tifm_ms_complete_cmd(struct tifm_ms *host)
+{
+	struct tifm_dev *sock = host->dev;
+	struct memstick_host *msh = tifm_get_drvdata(sock);
+	unsigned int tval = 0, data_len;
+	unsigned char *data;
+	int rc;
+
+	del_timer(&host->timer);
+	if (host->req->io_type == MEMSTICK_IO_SG) {
+		if (!host->no_dma)
+			tifm_unmap_sg(sock, &host->req->sg, 1,
+				      host->req->data_dir == READ
+				      ? PCI_DMA_FROMDEVICE
+				      : PCI_DMA_TODEVICE);
+	} else if (host->req->io_type == MEMSTICK_IO_VAL) {
+		writel(~TIFM_MS_SYS_DATA & readl(sock->addr + SOCK_MS_SYSTEM),
+		       sock->addr + SOCK_MS_SYSTEM);
+
+		data = host->req->data;
+		data_len = host->req->data_len;
+
+		if (host->req->data_dir == READ) {
+			for (rc = 0; (data_len - rc) >= 4; rc += 4)
+				*(int *)(data + rc)
+					= __raw_readl(sock->addr
+						      + SOCK_MS_DATA);
+
+			if (data_len - rc)
+				tval = readl(sock->addr + SOCK_MS_DATA);
+			switch (data_len - rc) {
+			case 3:
+				data[rc + 2] = (tval >> 16) & 0xff;
+			case 2:
+				data[rc + 1] = (tval >> 8) & 0xff;
+			case 1:
+				data[rc] = tval & 0xff;
+			}
+			readl(sock->addr + SOCK_MS_DATA);
+		}
+	}
+
+	writel((~TIFM_CTRL_LED) & readl(sock->addr + SOCK_CONTROL),
+	       sock->addr + SOCK_CONTROL);
+
+	do {
+		rc = memstick_next_req(msh, &host->req);
+	} while (!rc && tifm_ms_issue_cmd(host));
+}
+
+static int tifm_ms_check_status(struct tifm_ms *host)
+{
+	if (!host->req->error) {
+		if (!(host->cmd_flags & CMD_READY))
+			return 1;
+		if ((host->req->io_type == MEMSTICK_IO_SG)
+		    && !(host->cmd_flags & FIFO_READY))
+			return 1;
+		if (host->req->need_card_int
+		    && !(host->cmd_flags & CARD_READY))
+			return 1;
+	}
+	return 0;
+}
+
+/* Called from interrupt handler */
+static void tifm_ms_data_event(struct tifm_dev *sock)
+{
+	struct tifm_ms *host;
+	unsigned int fifo_status = 0;
+	int rc = 1;
+
+	spin_lock(&sock->lock);
+	host = memstick_priv((struct memstick_host *)tifm_get_drvdata(sock));
+	fifo_status = readl(sock->addr + SOCK_DMA_FIFO_STATUS);
+	dev_dbg(&sock->dev, "data event: fifo_status %x, flags %x\n",
+		fifo_status, host->cmd_flags);
+
+	if (host->req) {
+		if (fifo_status & TIFM_FIFO_READY) {
+			if (!host->no_dma || tifm_ms_transfer_data(host, 0)) {
+				host->cmd_flags |= FIFO_READY;
+				rc = tifm_ms_check_status(host);
+			}
+		}
+	}
+
+	writel(fifo_status, sock->addr + SOCK_DMA_FIFO_STATUS);
+	if (!rc)
+		tifm_ms_complete_cmd(host);
+
+	spin_unlock(&sock->lock);
+}
+
+
+/* Called from interrupt handler */
+static void tifm_ms_card_event(struct tifm_dev *sock)
+{
+	struct tifm_ms *host;
+	unsigned int host_status = 0;
+	int rc = 1;
+
+	spin_lock(&sock->lock);
+	host = memstick_priv((struct memstick_host *)tifm_get_drvdata(sock));
+	host_status = readl(sock->addr + SOCK_MS_STATUS);
+	dev_dbg(&sock->dev, "host event: host_status %x, flags %x\n",
+		host_status, host->cmd_flags);
+
+	if (host->req) {
+		if (host_status & TIFM_MS_TIMEOUT)
+			host->req->error = -ETIME;
+		else if (host_status & TIFM_MS_BADCRC)
+			host->req->error = -EILSEQ;
+
+		if (host->req->error) {
+			writel(TIFM_FIFO_INT_SETALL,
+			       sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR);
+			writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL);
+		}
+
+		if (host_status & TIFM_MS_EOTPC)
+			host->cmd_flags |= CMD_READY;
+		if (host_status & TIFM_MS_INT)
+			host->cmd_flags |= CARD_READY;
+
+		rc = tifm_ms_check_status(host);
+
+	}
+
+	writel(TIFM_MS_SYS_NOT_RDY | readl(sock->addr + SOCK_MS_SYSTEM),
+	       sock->addr + SOCK_MS_SYSTEM);
+	writel((~TIFM_MS_SYS_DATA) & readl(sock->addr + SOCK_MS_SYSTEM),
+	       sock->addr + SOCK_MS_SYSTEM);
+
+	if (!rc)
+		tifm_ms_complete_cmd(host);
+
+	spin_unlock(&sock->lock);
+	return;
+}
+
+static void tifm_ms_request(struct memstick_host *msh)
+{
+	struct tifm_ms *host = memstick_priv(msh);
+	struct tifm_dev *sock = host->dev;
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&sock->lock, flags);
+	if (host->req) {
+		printk(KERN_ERR "%s : unfinished request detected\n",
+		       sock->dev.bus_id);
+		spin_unlock_irqrestore(&sock->lock, flags);
+		tifm_eject(host->dev);
+		return;
+	}
+
+	if (host->eject) {
+		do {
+			rc = memstick_next_req(msh, &host->req);
+			if (!rc)
+				host->req->error = -ETIME;
+		} while (!rc);
+		spin_unlock_irqrestore(&sock->lock, flags);
+		return;
+	}
+
+	do {
+		rc = memstick_next_req(msh, &host->req);
+	} while (!rc && tifm_ms_issue_cmd(host));
+
+	spin_unlock_irqrestore(&sock->lock, flags);
+	return;
+}
+
+static void tifm_ms_set_param(struct memstick_host *msh,
+			      enum memstick_param param,
+			      int value)
+{
+	struct tifm_ms *host = memstick_priv(msh);
+	struct tifm_dev *sock = host->dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sock->lock, flags);
+
+	switch (param) {
+	case MEMSTICK_POWER:
+		/* this is set by card detection mechanism */
+		break;
+	case MEMSTICK_INTERFACE:
+		if (value == MEMSTICK_SERIAL) {
+			host->mode_mask = TIFM_MS_SERIAL;
+			writel((~TIFM_CTRL_FAST_CLK)
+			       & readl(sock->addr + SOCK_CONTROL),
+			       sock->addr + SOCK_CONTROL);
+		} else if (value == MEMSTICK_PARALLEL) {
+			host->mode_mask = 0;
+			writel(TIFM_CTRL_FAST_CLK
+			       | readl(sock->addr + SOCK_CONTROL),
+			       sock->addr + SOCK_CONTROL);
+		}
+		break;
+	};
+
+	spin_unlock_irqrestore(&sock->lock, flags);
+}
+
+static void tifm_ms_abort(unsigned long data)
+{
+	struct tifm_ms *host = (struct tifm_ms *)data;
+
+	dev_dbg(&host->dev->dev, "status %x\n",
+		readl(host->dev->addr + SOCK_MS_STATUS));
+	printk(KERN_ERR
+	       "%s : card failed to respond for a long period of time "
+	       "(%x, %x)\n",
+	       host->dev->dev.bus_id, host->req ? host->req->tpc : 0,
+	       host->cmd_flags);
+
+	tifm_eject(host->dev);
+}
+
+static int tifm_ms_initialize_host(struct tifm_ms *host)
+{
+	struct tifm_dev *sock = host->dev;
+	struct memstick_host *msh = tifm_get_drvdata(sock);
+
+	host->mode_mask = TIFM_MS_SERIAL;
+	writel(0x8000, sock->addr + SOCK_MS_SYSTEM);
+	writel(0x0200 | TIFM_MS_SYS_NOT_RDY, sock->addr + SOCK_MS_SYSTEM);
+	writel(0xffffffff, sock->addr + SOCK_MS_STATUS);
+	if (tifm_has_ms_pif(sock))
+		msh->caps |= MEMSTICK_CAP_PARALLEL;
+
+	return 0;
+}
+
+static int tifm_ms_probe(struct tifm_dev *sock)
+{
+	struct memstick_host *msh;
+	struct tifm_ms *host;
+	int rc = -EIO;
+
+	if (!(TIFM_SOCK_STATE_OCCUPIED
+	      & readl(sock->addr + SOCK_PRESENT_STATE))) {
+		printk(KERN_WARNING "%s : card gone, unexpectedly\n",
+		       sock->dev.bus_id);
+		return rc;
+	}
+
+	msh = memstick_alloc_host(sizeof(struct tifm_ms), &sock->dev);
+	if (!msh)
+		return -ENOMEM;
+
+	host = memstick_priv(msh);
+	tifm_set_drvdata(sock, msh);
+	host->dev = sock;
+	host->timeout_jiffies = msecs_to_jiffies(1000);
+	host->no_dma = no_dma;
+
+	setup_timer(&host->timer, tifm_ms_abort, (unsigned long)host);
+
+	msh->request = tifm_ms_request;
+	msh->set_param = tifm_ms_set_param;
+	sock->card_event = tifm_ms_card_event;
+	sock->data_event = tifm_ms_data_event;
+	rc = tifm_ms_initialize_host(host);
+
+	if (!rc)
+		rc = memstick_add_host(msh);
+	if (!rc)
+		return 0;
+
+	memstick_free_host(msh);
+	return rc;
+}
+
+static void tifm_ms_remove(struct tifm_dev *sock)
+{
+	struct memstick_host *msh = tifm_get_drvdata(sock);
+	struct tifm_ms *host = memstick_priv(msh);
+	int rc = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sock->lock, flags);
+	host->eject = 1;
+	if (host->req) {
+		del_timer(&host->timer);
+		writel(TIFM_FIFO_INT_SETALL,
+		       sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR);
+		writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL);
+		if ((host->req->io_type == MEMSTICK_IO_SG) && !host->no_dma)
+			tifm_unmap_sg(sock, &host->req->sg, 1,
+				      host->req->data_dir == READ
+				      ? PCI_DMA_TODEVICE
+				      : PCI_DMA_FROMDEVICE);
+		host->req->error = -ETIME;
+
+		do {
+			rc = memstick_next_req(msh, &host->req);
+			if (!rc)
+				host->req->error = -ETIME;
+		} while (!rc);
+	}
+	spin_unlock_irqrestore(&sock->lock, flags);
+
+	memstick_remove_host(msh);
+
+	writel(0x0200 | TIFM_MS_SYS_NOT_RDY, sock->addr + SOCK_MS_SYSTEM);
+	writel(0xffffffff, sock->addr + SOCK_MS_STATUS);
+
+	memstick_free_host(msh);
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_ms_suspend(struct tifm_dev *sock, pm_message_t state)
+{
+	return 0;
+}
+
+static int tifm_ms_resume(struct tifm_dev *sock)
+{
+	struct memstick_host *msh = tifm_get_drvdata(sock);
+	struct tifm_ms *host = memstick_priv(msh);
+
+	tifm_ms_initialize_host(host);
+	memstick_detect_change(msh);
+
+	return 0;
+}
+
+#else
+
+#define tifm_ms_suspend NULL
+#define tifm_ms_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct tifm_device_id tifm_ms_id_tbl[] = {
+	{ TIFM_TYPE_MS }, { 0 }
+};
+
+static struct tifm_driver tifm_ms_driver = {
+	.driver = {
+		.name  = DRIVER_NAME,
+		.owner = THIS_MODULE
+	},
+	.id_table = tifm_ms_id_tbl,
+	.probe    = tifm_ms_probe,
+	.remove   = tifm_ms_remove,
+	.suspend  = tifm_ms_suspend,
+	.resume   = tifm_ms_resume
+};
+
+static int __init tifm_ms_init(void)
+{
+	return tifm_register_driver(&tifm_ms_driver);
+}
+
+static void __exit tifm_ms_exit(void)
+{
+	tifm_unregister_driver(&tifm_ms_driver);
+}
+
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia MemoryStick driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(tifm, tifm_ms_id_tbl);
+MODULE_VERSION(DRIVER_VERSION);
+
+module_init(tifm_ms_init);
+module_exit(tifm_ms_exit);
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index 54380da343a5..63a089b29545 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -302,6 +302,21 @@ static int tifm_7xx1_resume(struct pci_dev *dev)
 
 #endif /* CONFIG_PM */
 
+static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm,
+				      struct tifm_dev *sock)
+{
+	return 0;
+}
+
+static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	if (((fm->num_sockets == 4) && (sock->socket_id == 2))
+	    || ((fm->num_sockets == 2) && (sock->socket_id == 0)))
+		return 1;
+
+	return 0;
+}
+
 static int tifm_7xx1_probe(struct pci_dev *dev,
 			   const struct pci_device_id *dev_id)
 {
@@ -336,6 +351,7 @@ static int tifm_7xx1_probe(struct pci_dev *dev,
 
 	INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media);
 	fm->eject = tifm_7xx1_eject;
+	fm->has_ms_pif = tifm_7xx1_has_ms_pif;
 	pci_set_drvdata(dev, fm);
 
 	fm->addr = ioremap(pci_resource_start(dev, 0),
@@ -377,6 +393,7 @@ static void tifm_7xx1_remove(struct pci_dev *dev)
 	int cnt;
 
 	fm->eject = tifm_7xx1_dummy_eject;
+	fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
 	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
 	mmiowb();
 	free_irq(dev->irq, fm);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
index 97544052e768..82dc72a1484f 100644
--- a/drivers/misc/tifm_core.c
+++ b/drivers/misc/tifm_core.c
@@ -284,6 +284,13 @@ void tifm_eject(struct tifm_dev *sock)
 }
 EXPORT_SYMBOL(tifm_eject);
 
+int tifm_has_ms_pif(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	return fm->has_ms_pif(fm, sock);
+}
+EXPORT_SYMBOL(tifm_has_ms_pif);
+
 int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
 		int direction)
 {
diff --git a/include/linux/memstick.h b/include/linux/memstick.h
new file mode 100644
index 000000000000..334d059d6794
--- /dev/null
+++ b/include/linux/memstick.h
@@ -0,0 +1,299 @@
+/*
+ *  Sony MemoryStick support
+ *
+ *  Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _MEMSTICK_H
+#define _MEMSTICK_H
+
+#include <linux/workqueue.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+
+/*** Hardware based structures ***/
+
+struct ms_status_register {
+	unsigned char reserved;
+	unsigned char interrupt;
+#define MEMSTICK_INT_CMDNAK             0x0001
+#define MEMSTICK_INT_BREQ               0x0020
+#define MEMSTICK_INT_ERR                0x0040
+#define MEMSTICK_INT_CED                0x0080
+
+	unsigned char status0;
+#define MEMSTICK_STATUS0_WP             0x0001
+#define MEMSTICK_STATUS0_SL             0x0002
+#define MEMSTICK_STATUS0_BF             0x0010
+#define MEMSTICK_STATUS0_BE             0x0020
+#define MEMSTICK_STATUS0_FB0            0x0040
+#define MEMSTICK_STATUS0_MB             0x0080
+
+	unsigned char status1;
+#define MEMSTICK_STATUS1_UCFG           0x0001
+#define MEMSTICK_STATUS1_FGER           0x0002
+#define MEMSTICK_STATUS1_UCEX           0x0004
+#define MEMSTICK_STATUS1_EXER           0x0008
+#define MEMSTICK_STATUS1_UCDT           0x0010
+#define MEMSTICK_STATUS1_DTER           0x0020
+#define MEMSTICK_STATUS1_FBI            0x0040
+#define MEMSTICK_STATUS1_MB             0x0080
+} __attribute__((packed));
+
+struct ms_id_register {
+	unsigned char type;
+	unsigned char reserved;
+	unsigned char category;
+	unsigned char class;
+} __attribute__((packed));
+
+struct ms_param_register {
+	unsigned char system;
+	unsigned char block_address_msb;
+	unsigned short block_address;
+	unsigned char cp;
+#define MEMSTICK_CP_BLOCK               0x0000
+#define MEMSTICK_CP_PAGE                0x0020
+#define MEMSTICK_CP_EXTRA               0x0040
+#define MEMSTICK_CP_OVERWRITE           0x0080
+
+	unsigned char page_address;
+} __attribute__((packed));
+
+struct ms_extra_data_register {
+	unsigned char  overwrite_flag;
+#define MEMSTICK_OVERWRITE_UPDATA       0x0010
+#define MEMSTICK_OVERWRITE_PAGE         0x0060
+#define MEMSTICK_OVERWRITE_BLOCK        0x0080
+
+	unsigned char  management_flag;
+#define MEMSTICK_MANAGEMENT_SYSTEM      0x0004
+#define MEMSTICK_MANAGEMENT_TRANS_TABLE 0x0008
+#define MEMSTICK_MANAGEMENT_COPY        0x0010
+#define MEMSTICK_MANAGEMENT_ACCESS      0x0020
+
+	unsigned short logical_address;
+} __attribute__((packed));
+
+struct ms_register {
+	struct ms_status_register     status;
+	struct ms_id_register         id;
+	unsigned char                 reserved[8];
+	struct ms_param_register      param;
+	struct ms_extra_data_register extra_data;
+} __attribute__((packed));
+
+struct mspro_param_register {
+	unsigned char  system;
+	unsigned short data_count;
+	unsigned int   data_address;
+	unsigned char  cmd_param;
+} __attribute__((packed));
+
+struct mspro_register {
+	struct ms_status_register    status;
+	struct ms_id_register        id;
+	unsigned char                reserved[8];
+	struct mspro_param_register  param;
+} __attribute__((packed));
+
+struct ms_register_addr {
+	unsigned char r_offset;
+	unsigned char r_length;
+	unsigned char w_offset;
+	unsigned char w_length;
+} __attribute__((packed));
+
+enum {
+	MS_TPC_READ_LONG_DATA   = 0x02,
+	MS_TPC_READ_SHORT_DATA  = 0x03,
+	MS_TPC_READ_REG         = 0x04,
+	MS_TPC_READ_IO_DATA     = 0x05, /* unverified */
+	MS_TPC_GET_INT          = 0x07,
+	MS_TPC_SET_RW_REG_ADRS  = 0x08,
+	MS_TPC_EX_SET_CMD       = 0x09,
+	MS_TPC_WRITE_IO_DATA    = 0x0a, /* unverified */
+	MS_TPC_WRITE_REG        = 0x0b,
+	MS_TPC_WRITE_SHORT_DATA = 0x0c,
+	MS_TPC_WRITE_LONG_DATA  = 0x0d,
+	MS_TPC_SET_CMD          = 0x0e
+};
+
+enum {
+	MS_CMD_BLOCK_END     = 0x33,
+	MS_CMD_RESET         = 0x3c,
+	MS_CMD_BLOCK_WRITE   = 0x55,
+	MS_CMD_SLEEP         = 0x5a,
+	MS_CMD_BLOCK_ERASE   = 0x99,
+	MS_CMD_BLOCK_READ    = 0xaa,
+	MS_CMD_CLEAR_BUF     = 0xc3,
+	MS_CMD_FLASH_STOP    = 0xcc,
+	MSPRO_CMD_FORMAT     = 0x10,
+	MSPRO_CMD_SLEEP      = 0x11,
+	MSPRO_CMD_READ_DATA  = 0x20,
+	MSPRO_CMD_WRITE_DATA = 0x21,
+	MSPRO_CMD_READ_ATRB  = 0x24,
+	MSPRO_CMD_STOP       = 0x25,
+	MSPRO_CMD_ERASE      = 0x26,
+	MSPRO_CMD_SET_IBA    = 0x46,
+	MSPRO_CMD_SET_IBD    = 0x47
+/*
+	MSPRO_CMD_RESET
+	MSPRO_CMD_WAKEUP
+	MSPRO_CMD_IN_IO_DATA
+	MSPRO_CMD_OUT_IO_DATA
+	MSPRO_CMD_READ_IO_ATRB
+	MSPRO_CMD_IN_IO_FIFO
+	MSPRO_CMD_OUT_IO_FIFO
+	MSPRO_CMD_IN_IOM
+	MSPRO_CMD_OUT_IOM
+*/
+};
+
+/*** Driver structures and functions ***/
+
+#define MEMSTICK_PART_SHIFT 3
+
+enum memstick_param { MEMSTICK_POWER = 1, MEMSTICK_INTERFACE };
+
+#define MEMSTICK_POWER_OFF 0
+#define MEMSTICK_POWER_ON  1
+
+#define MEMSTICK_SERIAL   0
+#define MEMSTICK_PARALLEL 1
+
+struct memstick_host;
+struct memstick_driver;
+
+#define MEMSTICK_MATCH_ALL            0x01
+
+#define MEMSTICK_TYPE_LEGACY          0xff
+#define MEMSTICK_TYPE_DUO             0x00
+#define MEMSTICK_TYPE_PRO             0x01
+
+#define MEMSTICK_CATEGORY_STORAGE     0xff
+#define MEMSTICK_CATEGORY_STORAGE_DUO 0x00
+
+#define MEMSTICK_CLASS_GENERIC        0xff
+#define MEMSTICK_CLASS_GENERIC_DUO    0x00
+
+
+struct memstick_device_id {
+	unsigned char match_flags;
+	unsigned char type;
+	unsigned char category;
+	unsigned char class;
+};
+
+struct memstick_request {
+	unsigned char tpc;
+	unsigned char data_dir:1,
+		      need_card_int:1,
+		      get_int_reg:1,
+		      io_type:2;
+#define               MEMSTICK_IO_NONE 0
+#define               MEMSTICK_IO_VAL  1
+#define               MEMSTICK_IO_SG   2
+
+	unsigned char int_reg;
+	int           error;
+	union {
+		struct scatterlist sg;
+		struct {
+			unsigned char data_len;
+			unsigned char data[15];
+		};
+	};
+};
+
+struct memstick_dev {
+	struct memstick_device_id id;
+	struct memstick_host     *host;
+	struct ms_register_addr  reg_addr;
+	struct completion        mrq_complete;
+	struct memstick_request  current_mrq;
+
+	/* Check that media driver is still willing to operate the device. */
+	int                      (*check)(struct memstick_dev *card);
+	/* Get next request from the media driver.                         */
+	int                      (*next_request)(struct memstick_dev *card,
+						 struct memstick_request **mrq);
+
+	struct device            dev;
+};
+
+struct memstick_host {
+	struct mutex        lock;
+	unsigned int        id;
+	unsigned int        caps;
+#define MEMSTICK_CAP_PARALLEL      1
+#define MEMSTICK_CAP_AUTO_GET_INT  2
+
+	struct work_struct  media_checker;
+	struct class_device cdev;
+
+	struct memstick_dev *card;
+	unsigned int        retries;
+
+	/* Notify the host that some requests are pending. */
+	void                (*request)(struct memstick_host *host);
+	/* Set host IO parameters (power, clock, etc).     */
+	void                (*set_param)(struct memstick_host *host,
+					 enum memstick_param param,
+					 int value);
+	unsigned long       private[0] ____cacheline_aligned;
+};
+
+struct memstick_driver {
+	struct memstick_device_id *id_table;
+	int                       (*probe)(struct memstick_dev *card);
+	void                      (*remove)(struct memstick_dev *card);
+	int                       (*suspend)(struct memstick_dev *card,
+					     pm_message_t state);
+	int                       (*resume)(struct memstick_dev *card);
+
+	struct device_driver      driver;
+};
+
+int memstick_register_driver(struct memstick_driver *drv);
+void memstick_unregister_driver(struct memstick_driver *drv);
+
+struct memstick_host *memstick_alloc_host(unsigned int extra,
+					  struct device *dev);
+
+int memstick_add_host(struct memstick_host *host);
+void memstick_remove_host(struct memstick_host *host);
+void memstick_free_host(struct memstick_host *host);
+void memstick_detect_change(struct memstick_host *host);
+
+void memstick_init_req_sg(struct memstick_request *mrq, unsigned char tpc,
+			  struct scatterlist *sg);
+void memstick_init_req(struct memstick_request *mrq, unsigned char tpc,
+		       void *buf, size_t length);
+int memstick_next_req(struct memstick_host *host,
+		      struct memstick_request **mrq);
+void memstick_new_req(struct memstick_host *host);
+
+int memstick_set_rw_addr(struct memstick_dev *card);
+
+static inline void *memstick_priv(struct memstick_host *host)
+{
+	return (void *)host->private;
+}
+
+static inline void *memstick_get_drvdata(struct memstick_dev *card)
+{
+	return dev_get_drvdata(&card->dev);
+}
+
+static inline void memstick_set_drvdata(struct memstick_dev *card, void *data)
+{
+	dev_set_drvdata(&card->dev, data);
+}
+
+#endif
diff --git a/include/linux/tifm.h b/include/linux/tifm.h
index 2096b76d0cee..da76ed85f595 100644
--- a/include/linux/tifm.h
+++ b/include/linux/tifm.h
@@ -72,6 +72,7 @@ enum {
 #define TIFM_FIFO_READY           0x00000001
 #define TIFM_FIFO_INT_SETALL      0x0000ffff
 #define TIFM_FIFO_INTMASK         0x00000005
+#define TIFM_FIFO_SIZE            0x00000200
 
 #define TIFM_DMA_RESET            0x00000002
 #define TIFM_DMA_TX               0x00008000
@@ -124,6 +125,8 @@ struct tifm_adapter {
 
 	void                (*eject)(struct tifm_adapter *fm,
 				     struct tifm_dev *sock);
+	int                 (*has_ms_pif)(struct tifm_adapter *fm,
+					  struct tifm_dev *sock);
 
 	struct tifm_dev     *sockets[0];
 };
@@ -141,6 +144,7 @@ struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id,
 int tifm_register_driver(struct tifm_driver *drv);
 void tifm_unregister_driver(struct tifm_driver *drv);
 void tifm_eject(struct tifm_dev *sock);
+int tifm_has_ms_pif(struct tifm_dev *sock);
 int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
 		int direction);
 void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
-- 
cgit 


From 185c045c245f46485ad8bbd8cc1100e986ff3f13 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sat, 9 Feb 2008 23:24:09 +0100
Subject: x86, core: remove CONFIG_FORCED_INLINING

Other than the defconfigs, remove the entry in compiler-gcc4.h,
Kconfig.debug and feature-removal-schedule.txt.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/feature-removal-schedule.txt |  9 ---------
 arch/x86/configs/i386_defconfig            |  1 -
 arch/x86/configs/x86_64_defconfig          |  1 -
 include/linux/compiler-gcc4.h              |  9 ---------
 lib/Kconfig.debug                          | 14 --------------
 5 files changed, 34 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ce9503c892b5..557f4a2f1655 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -111,15 +111,6 @@ Who:	Christoph Hellwig <hch@lst.de>
 
 ---------------------------
 
-What:	CONFIG_FORCED_INLINING
-When:	June 2006
-Why:	Config option is there to see if gcc is good enough. (in january
-        2006). If it is, the behavior should just be the default. If it's not,
-	the option should just go away entirely.
-Who:    Arjan van de Ven
-
----------------------------
-
 What:   eepro100 network driver
 When:   January 2007
 Why:    replaced by the e100 driver
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 77562e7cdab6..3df340b54e57 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9e2b0ef851de..eef98cb00c62 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 0ab3a3232330..974f5b7bb205 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -5,15 +5,6 @@
 /* These definitions are for GCC v4.x.  */
 #include <linux/compiler-gcc.h>
 
-#ifdef CONFIG_FORCED_INLINING
-# undef inline
-# undef __inline__
-# undef __inline
-# define inline			inline		__attribute__((always_inline))
-# define __inline__		__inline__	__attribute__((always_inline))
-# define __inline		__inline	__attribute__((always_inline))
-#endif
-
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ce0bb2600c25..a370fe828a79 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -465,20 +465,6 @@ config FRAME_POINTER
 	  some architectures or if you use external debuggers.
 	  If you don't debug the kernel, you can say N.
 
-config FORCED_INLINING
-	bool "Force gcc to inline functions marked 'inline'"
-	depends on DEBUG_KERNEL
-	default y
-	help
-	  This option determines if the kernel forces gcc to inline the functions
-	  developers have marked 'inline'. Doing so takes away freedom from gcc to
-	  do what it thinks is best, which is desirable for the gcc 3.x series of
-	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
-	  disabling this option will generate a smaller kernel there. Hopefully
-	  this algorithm is so good that allowing gcc4 to make the decision can
-	  become the default in the future, until then this option is there to
-	  test gcc for this.
-
 config BOOT_PRINTK_DELAY
 	bool "Delay each boot printk message by N milliseconds"
 	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
-- 
cgit 


From 469108ff3dcbc00313699d620c47f3ee1e7d19c6 Mon Sep 17 00:00:00 2001
From: Theodore Tso <tytso@MIT.EDU>
Date: Sun, 10 Feb 2008 01:11:44 -0500
Subject: ext4: Add new "development flag" to the ext4 filesystem

This flag is simply a generic "this is a crash/burn test filesystem"
marker.  If it is set, then filesystem code which is "in development"
will be allowed to mount the filesystem.  Filesystem code which is not
considered ready for prime-time will check for this flag, and if it is
not set, it will refuse to touch the filesystem.

As we start rolling ext4 out to distro's like Fedora, et. al, this makes
it less likely that a user might accidentally start using ext4 on a
production filesystem; a bad thing, since that will essentially make it
be unfsckable until e2fsprogs catches up.

Signed-off-by: Theodore Tso <tytso@MIT.EDU>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext4/super.c         | 11 +++++++++++
 include/linux/ext4_fs.h |  7 +++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 93beb865c20d..0072da75221f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1919,6 +1919,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		printk(KERN_WARNING
 		       "EXT4-fs warning: feature flags set on rev 0 fs, "
 		       "running e2fsck is recommended\n");
+
+	/*
+	 * Since ext4 is still considered development code, we require
+	 * that the TEST_FILESYS flag in s->flags be set.
+	 */
+	if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
+		printk(KERN_WARNING "EXT4-fs: %s: not marked "
+		       "OK to use with test code.\n", sb->s_id);
+		goto failed_mount;
+	}
+
 	/*
 	 * Check feature flags regardless of the revision level, since we
 	 * previously didn't change the revision level when setting the flags,
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index c4f635a4dd25..250032548597 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -489,6 +489,13 @@ do {									       \
 #define	EXT4_ERROR_FS			0x0002	/* Errors detected */
 #define	EXT4_ORPHAN_FS			0x0004	/* Orphans being recovered */
 
+/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH		0x0001  /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH	0x0002  /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS		0x0004	/* to test development code */
+
 /*
  * Mount flags
  */
-- 
cgit 


From e13a2e61dd5152f5499d2003470acf9c838eab84 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Sun, 10 Feb 2008 10:48:03 +0100
Subject: ntp: correct inconsistent interval/tick_length usage

clocksource initialization and error accumulation.  This corrects a 280ppm
drift seen on some systems using acpi_pm, and affects other clocksources as
well (likely to a lesser degree).

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 9 ++++++++-
 kernel/time/ntp.c     | 4 ----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 8ea3e71ba7fa..c3f374786a43 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -232,7 +232,14 @@ static inline int ntp_synced(void)
 #else
 #define NTP_INTERVAL_FREQ  (HZ)
 #endif
-#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
+
+#define CLOCK_TICK_OVERFLOW	(LATCH * HZ - CLOCK_TICK_RATE)
+#define CLOCK_TICK_ADJUST	(((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
+					(s64)CLOCK_TICK_RATE)
+
+/* Because using NSEC_PER_SEC would be too easy */
+#define NTP_INTERVAL_LENGTH ((((s64)TICK_USEC * NSEC_PER_USEC * USER_HZ) + \
+			      CLOCK_TICK_ADJUST) / NTP_INTERVAL_FREQ)
 
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
 extern u64 current_tick_length(void);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index e64efaf957e8..c88b5910e7ab 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -43,10 +43,6 @@ long time_freq;				/* frequency offset (scaled ppm)*/
 static long time_reftime;		/* time at last adjustment (s)	*/
 long time_adjust;
 
-#define CLOCK_TICK_OVERFLOW	(LATCH * HZ - CLOCK_TICK_RATE)
-#define CLOCK_TICK_ADJUST	(((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
-					(s64)CLOCK_TICK_RATE)
-
 static void ntp_update_frequency(void)
 {
 	u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
-- 
cgit 


From 080344b98805553f9b01de0f59a41b1533036d8d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 1 Feb 2008 17:29:05 +0300
Subject: hrtimer: fix *rmtp handling in hrtimer_nanosleep()

Spotted by Pavel Emelyanov and Alexey Dobriyan.

hrtimer_nanosleep() sets restart_block->arg1 = rmtp, but this rmtp points to
the local variable which lives in the caller's stack frame. This means that
if sys_restart_syscall() actually happens and it is interrupted as well, we
don't update the user-space variable, but write into the already dead stack
frame.

Introduced by commit 04c227140fed77587432667a574b14736a06dd7f
hrtimer: Rework hrtimer_nanosleep to make sys_compat_nanosleep easier

Change the callers to pass "__user *rmtp" to hrtimer_nanosleep(), and change
hrtimer_nanosleep() to use copy_to_user() to actually update *rmtp.

Small problem remains. man 2 nanosleep states that *rtmp should be written if
nanosleep() was interrupted (it says nothing whether it is OK to update *rmtp
if nanosleep returns 0), but (with or without this patch) we can dirty *rem
even if nanosleep() returns 0.

NOTE: this patch doesn't change compat_sys_nanosleep(), because it has other
bugs. Fixed by the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Alexey Dobriyan <adobriyan@sw.ru>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Pavel Emelyanov <xemul@sw.ru>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Toyo Abe <toyoa@mvista.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

 include/linux/hrtimer.h |    2 -
 kernel/hrtimer.c        |   51 +++++++++++++++++++++++++-----------------------
 kernel/posix-timers.c   |   14 +------------
 3 files changed, 30 insertions(+), 37 deletions(-)
---
 include/linux/hrtimer.h |  2 +-
 kernel/hrtimer.c        | 51 ++++++++++++++++++++++++++-----------------------
 kernel/posix-timers.c   | 17 +++--------------
 3 files changed, 31 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 600fc3bcf63e..1ad56a7b2f74 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -316,7 +316,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
 
 /* Precise sleep: */
 extern long hrtimer_nanosleep(struct timespec *rqtp,
-			      struct timespec *rmtp,
+			      struct timespec __user *rmtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 668f3967eb39..355085f0896e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1319,11 +1319,26 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 	return t->task == NULL;
 }
 
+static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
+{
+	struct timespec rmt;
+	ktime_t rem;
+
+	rem = ktime_sub(timer->expires, timer->base->get_time());
+	if (rem.tv64 <= 0)
+		return 0;
+	rmt = ktime_to_timespec(rem);
+
+	if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+		return -EFAULT;
+
+	return 1;
+}
+
 long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 {
 	struct hrtimer_sleeper t;
-	struct timespec *rmtp;
-	ktime_t time;
+	struct timespec __user  *rmtp;
 
 	restart->fn = do_no_restart_syscall;
 
@@ -1333,12 +1348,11 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	if (do_nanosleep(&t, HRTIMER_MODE_ABS))
 		return 0;
 
-	rmtp = (struct timespec *)restart->arg1;
+	rmtp = (struct timespec __user *)restart->arg1;
 	if (rmtp) {
-		time = ktime_sub(t.timer.expires, t.timer.base->get_time());
-		if (time.tv64 <= 0)
-			return 0;
-		*rmtp = ktime_to_timespec(time);
+		int ret = update_rmtp(&t.timer, rmtp);
+		if (ret <= 0)
+			return ret;
 	}
 
 	restart->fn = hrtimer_nanosleep_restart;
@@ -1347,12 +1361,11 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	return -ERESTART_RESTARTBLOCK;
 }
 
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
 {
 	struct restart_block *restart;
 	struct hrtimer_sleeper t;
-	ktime_t rem;
 
 	hrtimer_init(&t.timer, clockid, mode);
 	t.timer.expires = timespec_to_ktime(*rqtp);
@@ -1364,10 +1377,9 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
 		return -ERESTARTNOHAND;
 
 	if (rmtp) {
-		rem = ktime_sub(t.timer.expires, t.timer.base->get_time());
-		if (rem.tv64 <= 0)
-			return 0;
-		*rmtp = ktime_to_timespec(rem);
+		int ret = update_rmtp(&t.timer, rmtp);
+		if (ret <= 0)
+			return ret;
 	}
 
 	restart = &current_thread_info()->restart_block;
@@ -1383,8 +1395,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp,
 asmlinkage long
 sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
 {
-	struct timespec tu, rmt;
-	int ret;
+	struct timespec tu;
 
 	if (copy_from_user(&tu, rqtp, sizeof(tu)))
 		return -EFAULT;
@@ -1392,15 +1403,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
 	if (!timespec_valid(&tu))
 		return -EINVAL;
 
-	ret = hrtimer_nanosleep(&tu, rmtp ? &rmt : NULL, HRTIMER_MODE_REL,
-				CLOCK_MONOTONIC);
-
-	if (ret && rmtp) {
-		if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
-			return -EFAULT;
-	}
-
-	return ret;
+	return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
 /*
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index ce268966007d..022c9c3cee6f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -982,20 +982,9 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
 static int common_nsleep(const clockid_t which_clock, int flags,
 			 struct timespec *tsave, struct timespec __user *rmtp)
 {
-	struct timespec rmt;
-	int ret;
-
-	ret = hrtimer_nanosleep(tsave, rmtp ? &rmt : NULL,
-				flags & TIMER_ABSTIME ?
-				HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
-				which_clock);
-
-	if (ret && rmtp) {
-		if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
-			return -EFAULT;
-	}
-
-	return ret;
+	return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
+				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+				 which_clock);
 }
 
 asmlinkage long
-- 
cgit 


From b6ce068a1285a24185b01be8a49021827516b3e1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sun, 10 Feb 2008 09:45:28 -0500
Subject: Change pci_raw_ops to pci_raw_read/write

We want to allow different implementations of pci_raw_ops for standard
and extended config space on x86.  Rather than clutter generic code with
knowledge of this, we make pci_raw_ops private to x86 and use it to
implement the new raw interface -- raw_pci_read() and raw_pci_write().

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/pci/pci.c               | 25 ++++++++-----------------
 arch/ia64/sn/pci/tioce_provider.c | 16 ++++++++--------
 arch/x86/kernel/quirks.c          |  2 +-
 arch/x86/pci/common.c             | 25 +++++++++++++++++++++++--
 arch/x86/pci/direct.c             |  4 ++--
 arch/x86/pci/fixup.c              |  6 ++++--
 arch/x86/pci/legacy.c             |  2 +-
 arch/x86/pci/mmconfig-shared.c    |  6 +++---
 arch/x86/pci/mmconfig_32.c        | 10 ++--------
 arch/x86/pci/mmconfig_64.c        |  8 +-------
 arch/x86/pci/pci.h                | 15 +++++++++++----
 arch/x86/pci/visws.c              |  3 ---
 drivers/acpi/osl.c                | 25 ++++++-------------------
 include/linux/pci.h               | 16 ++++++++--------
 14 files changed, 78 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 488e48a5deea..8fd7e825192b 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -43,8 +43,7 @@
 #define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg)	\
 	(((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
 
-static int
-pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn,
 	      int reg, int len, u32 *value)
 {
 	u64 addr, data = 0;
@@ -68,8 +67,7 @@ pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
 	return 0;
 }
 
-static int
-pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn,
 	       int reg, int len, u32 value)
 {
 	u64 addr;
@@ -91,24 +89,17 @@ pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
 	return 0;
 }
 
-static struct pci_raw_ops pci_sal_ops = {
-	.read =		pci_sal_read,
-	.write =	pci_sal_write
-};
-
-struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
-
-static int
-pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 *value)
 {
-	return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+	return raw_pci_read(pci_domain_nr(bus), bus->number,
 				 devfn, where, size, value);
 }
 
-static int
-pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 value)
 {
-	return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+	return raw_pci_write(pci_domain_nr(bus), bus->number,
 				  devfn, where, size, value);
 }
 
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e1a3e19d3d9c..999f14f986e2 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -752,13 +752,13 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	 * Determine the secondary bus number of the port2 logical PPB.
 	 * This is used to decide whether a given pci device resides on
 	 * port1 or port2.  Note:  We don't have enough plumbing set up
-	 * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
+	 * here to use pci_read_config_xxx() so use raw_pci_read().
 	 */
 
 	seg = tioce_common->ce_pcibus.bs_persist_segment;
 	bus = tioce_common->ce_pcibus.bs_persist_busnum;
 
-	raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
+	raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
 	tioce_kern->ce_port1_secondary = (u8) tmp;
 
 	/*
@@ -799,11 +799,11 @@ tioce_kern_init(struct tioce_common *tioce_common)
 
 		/* mem base/limit */
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_MEMORY_BASE, 2, &tmp);
 		base = (u64)tmp << 16;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_MEMORY_LIMIT, 2, &tmp);
 		limit = (u64)tmp << 16;
 		limit |= 0xfffffUL;
@@ -817,21 +817,21 @@ tioce_kern_init(struct tioce_common *tioce_common)
 		 * attributes.
 		 */
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_MEMORY_BASE, 2, &tmp);
 		base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_BASE_UPPER32, 4, &tmp);
 		base |= (u64)tmp << 32;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_MEMORY_LIMIT, 2, &tmp);
 
 		limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
 		limit |= 0xfffffUL;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_LIMIT_UPPER32, 4, &tmp);
 		limit |= (u64)tmp << 32;
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 6ba33ca8715a..1941482d4ca3 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -27,7 +27,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 	pci_write_config_byte(dev, 0xf4, config|0x2);
 
 	/* read xTPR register */
-	raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+	raw_pci_read(0, 0, 0x40, 0x4c, 2, &word);
 
 	if (!(word & (1 << 13))) {
 		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 52deabc72a6f..b7c67a187b6b 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -26,16 +26,37 @@ int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
 struct pci_raw_ops *raw_pci_ops;
+struct pci_raw_ops *raw_pci_ext_ops;
+
+int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 *val)
+{
+	if (reg < 256 && raw_pci_ops)
+		return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
+	if (raw_pci_ext_ops)
+		return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
+	return -EINVAL;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 val)
+{
+	if (reg < 256 && raw_pci_ops)
+		return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
+	if (raw_pci_ext_ops)
+		return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
+	return -EINVAL;
+}
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
 {
-	return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+	return raw_pci_read(pci_domain_nr(bus), bus->number,
 				 devfn, where, size, value);
 }
 
 static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
 {
-	return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+	return raw_pci_write(pci_domain_nr(bus), bus->number,
 				  devfn, where, size, value);
 }
 
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 431c9a51b157..42f3e4cad179 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -14,7 +14,7 @@
 #define PCI_CONF1_ADDRESS(bus, devfn, reg) \
 	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
 
-int pci_conf1_read(unsigned int seg, unsigned int bus,
+static int pci_conf1_read(unsigned int seg, unsigned int bus,
 			  unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
@@ -45,7 +45,7 @@ int pci_conf1_read(unsigned int seg, unsigned int bus,
 	return 0;
 }
 
-int pci_conf1_write(unsigned int seg, unsigned int bus,
+static int pci_conf1_write(unsigned int seg, unsigned int bus,
 			   unsigned int devfn, int reg, int len, u32 value)
 {
 	unsigned long flags;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 74d30ff33c49..a5ef5f551373 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -215,7 +215,8 @@ static int quirk_aspm_offset[MAX_PCIEROOT << 3];
 
 static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
 {
-	return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+	return raw_pci_read(pci_domain_nr(bus), bus->number,
+						devfn, where, size, value);
 }
 
 /*
@@ -231,7 +232,8 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
 	if ((offset) && (where == offset))
 		value = value & 0xfffffffc;
 
-	return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+	return raw_pci_write(pci_domain_nr(bus), bus->number,
+						devfn, where, size, value);
 }
 
 static struct pci_ops quirk_pcie_aspm_ops = {
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 5565d7016b75..e041ced0ce13 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -22,7 +22,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
 		if (pci_find_bus(0, n))
 			continue;
 		for (devfn = 0; devfn < 256; devfn += 8) {
-			if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
+			if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
 			    l != 0x0000 && l != 0xffff) {
 				DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
 				printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 6b521d389327..8d54df4dfaad 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted;
 static const char __init *pci_mmcfg_e7520(void)
 {
 	u32 win;
-	pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
+	pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
 
 	win = win & 0xf000;
 	if(win == 0x0000 || win == 0xf000)
@@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void)
 
 	pci_mmcfg_config_num = 1;
 
-	pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
+	pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
 
 	/* Enable bit */
 	if (!(pciexbar & 1))
@@ -118,7 +118,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
 	int i;
 	const char *name;
 
-	pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
+	pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
 	vendor = l & 0xffff;
 	device = (l >> 16) & 0xffff;
 
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 7b75e6513436..081816ada057 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -68,9 +68,6 @@ err:		*value = -1;
 		return -EINVAL;
 	}
 
-	if (reg < 256)
-		return pci_conf1_read(seg,bus,devfn,reg,len,value);
-
 	base = get_base_addr(seg, bus, devfn);
 	if (!base)
 		goto err;
@@ -104,9 +101,6 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 	if ((bus > 255) || (devfn > 255) || (reg > 4095))
 		return -EINVAL;
 
-	if (reg < 256)
-		return pci_conf1_write(seg,bus,devfn,reg,len,value);
-
 	base = get_base_addr(seg, bus, devfn);
 	if (!base)
 		return -EINVAL;
@@ -138,7 +132,7 @@ static struct pci_raw_ops pci_mmcfg = {
 
 int __init pci_mmcfg_arch_init(void)
 {
-	printk(KERN_INFO "PCI: Using MMCONFIG\n");
-	raw_pci_ops = &pci_mmcfg;
+	printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n");
+	raw_pci_ext_ops = &pci_mmcfg;
 	return 1;
 }
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index c4cf318e44a9..9207fd49233c 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -58,9 +58,6 @@ err:		*value = -1;
 		return -EINVAL;
 	}
 
-	if (reg < 256)
-		return pci_conf1_read(seg,bus,devfn,reg,len,value);
-
 	addr = pci_dev_base(seg, bus, devfn);
 	if (!addr)
 		goto err;
@@ -89,9 +86,6 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
 		return -EINVAL;
 
-	if (reg < 256)
-		return pci_conf1_write(seg,bus,devfn,reg,len,value);
-
 	addr = pci_dev_base(seg, bus, devfn);
 	if (!addr)
 		return -EINVAL;
@@ -150,6 +144,6 @@ int __init pci_mmcfg_arch_init(void)
 			return 0;
 		}
 	}
-	raw_pci_ops = &pci_mmcfg;
+	raw_pci_ext_ops = &pci_mmcfg;
 	return 1;
 }
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 36cb44c397c3..3431518d921a 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -85,10 +85,17 @@ extern spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
-extern int pci_conf1_write(unsigned int seg, unsigned int bus,
-			   unsigned int devfn, int reg, int len, u32 value);
-extern int pci_conf1_read(unsigned int seg, unsigned int bus,
-			  unsigned int devfn, int reg, int len, u32 *value);
+struct pci_raw_ops {
+	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 *val);
+	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 val);
+};
+
+extern struct pci_raw_ops *raw_pci_ops;
+extern struct pci_raw_ops *raw_pci_ext_ops;
+
+extern struct pci_raw_ops pci_direct_conf1;
 
 extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 8ecb1c722594..c2df4e97eed6 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -13,9 +13,6 @@
 
 #include "pci.h"
 
-
-extern struct pci_raw_ops pci_direct_conf1;
-
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index a14501c98f40..34b3386dedca 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -200,15 +200,6 @@ acpi_status __init acpi_os_initialize(void)
 
 acpi_status acpi_os_initialize1(void)
 {
-	/*
-	 * Initialize PCI configuration space access, as we'll need to access
-	 * it while walking the namespace (bus 0 and root bridges w/ _BBNs).
-	 */
-	if (!raw_pci_ops) {
-		printk(KERN_ERR PREFIX
-		       "Access to PCI configuration space unavailable\n");
-		return AE_NULL_ENTRY;
-	}
 	kacpid_wq = create_singlethread_workqueue("kacpid");
 	kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify");
 	BUG_ON(!kacpid_wq);
@@ -653,11 +644,9 @@ acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
 		return AE_ERROR;
 	}
 
-	BUG_ON(!raw_pci_ops);
-
-	result = raw_pci_ops->read(pci_id->segment, pci_id->bus,
-				   PCI_DEVFN(pci_id->device, pci_id->function),
-				   reg, size, value);
+	result = raw_pci_read(pci_id->segment, pci_id->bus,
+				PCI_DEVFN(pci_id->device, pci_id->function),
+				reg, size, value);
 
 	return (result ? AE_ERROR : AE_OK);
 }
@@ -682,11 +671,9 @@ acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
 		return AE_ERROR;
 	}
 
-	BUG_ON(!raw_pci_ops);
-
-	result = raw_pci_ops->write(pci_id->segment, pci_id->bus,
-				    PCI_DEVFN(pci_id->device, pci_id->function),
-				    reg, size, value);
+	result = raw_pci_write(pci_id->segment, pci_id->bus,
+				PCI_DEVFN(pci_id->device, pci_id->function),
+				reg, size, value);
 
 	return (result ? AE_ERROR : AE_OK);
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7215d3b1f4af..87195b62de52 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -301,14 +301,14 @@ struct pci_ops {
 	int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val);
 };
 
-struct pci_raw_ops {
-	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
-		    int reg, int len, u32 *val);
-	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
-		     int reg, int len, u32 val);
-};
-
-extern struct pci_raw_ops *raw_pci_ops;
+/*
+ * ACPI needs to be able to access PCI config space before we've done a
+ * PCI bus scan and created pci_bus structures.
+ */
+extern int raw_pci_read(unsigned int domain, unsigned int bus,
+			unsigned int devfn, int reg, int len, u32 *val);
+extern int raw_pci_write(unsigned int domain, unsigned int bus,
+			unsigned int devfn, int reg, int len, u32 val);
 
 struct pci_bus_region {
 	resource_size_t start;
-- 
cgit 


From fbb7878c1a2ee40a1e983bf20f3dd3a80255dcf2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 7 Feb 2008 23:10:21 -0500
Subject: nfsd: clean up svc_reserve_auth()

This is a void function attempting to return the return value from
another void function, which seems harmless but extremely weird, and
apparently makes some compilers complain.

While we're there, clean up a little (e.g. the switch statement had a
minor style problem and seemed overkill as long as there's only one
case).

Thanks to Trond for noticing this.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svc.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 64c771056187..64c97552964a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -409,16 +409,13 @@ char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
  * for all cases without actually generating the checksum, so we just use a
  * static value.
  */
-static inline void
-svc_reserve_auth(struct svc_rqst *rqstp, int space)
+static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int			added_space = 0;
+	int added_space = 0;
 
-	switch(rqstp->rq_authop->flavour) {
-		case RPC_AUTH_GSS:
-			added_space = RPC_MAX_AUTH_SIZE;
-	}
-	return svc_reserve(rqstp, space + added_space);
+	if (rqstp->rq_authop->flavour)
+		added_space = RPC_MAX_AUTH_SIZE;
+	svc_reserve(rqstp, space + added_space);
 }
 
 #endif /* SUNRPC_SVC_H */
-- 
cgit 


From 8e882ba111bb52fbb42c34a265afb97ddd4fcea1 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Mon, 11 Feb 2008 00:32:14 +0100
Subject: ide: introduce CONFIG_BLK_DEV_IDEDMA_SFF option

Introduce new option CONFIG_BLK_DEV_IDEDMA_SFF for non-PCI SFF-8038i compatible
bus mastering IDE controllers (which there are a few known), thus fixing a hack
made for Palmchip BK3710 controller...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Anton Salnikov <asalnikov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig   |  9 +++++++--
 drivers/ide/ide-dma.c | 14 +++++++-------
 include/linux/ide.h   |  9 ++++-----
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index d2c4f06f53c9..df752e690e47 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -378,6 +378,9 @@ config BLK_DEV_IDEPNP
 	  would like the kernel to automatically detect and activate
 	  it, say Y here.
 
+config BLK_DEV_IDEDMA_SFF
+	bool
+
 if PCI
 
 comment "PCI IDE chipsets support"
@@ -459,6 +462,7 @@ config BLK_DEV_RZ1000
 config BLK_DEV_IDEDMA_PCI
 	bool
 	select BLK_DEV_IDEPCI
+	select BLK_DEV_IDEDMA_SFF
 
 config BLK_DEV_AEC62XX
 	tristate "AEC62XX chipset support"
@@ -999,7 +1003,7 @@ config BLK_DEV_Q40IDE
 config BLK_DEV_PALMCHIP_BK3710
 	tristate "Palmchip bk3710 IDE controller support"
 	depends on ARCH_DAVINCI
-	select BLK_DEV_IDEDMA_PCI
+	select BLK_DEV_IDEDMA_SFF
 	help
 	  Say Y here if you want to support the onchip IDE controller on the
 	  TI DaVinci SoC
@@ -1107,7 +1111,8 @@ config BLK_DEV_UMC8672
 endif
 
 config BLK_DEV_IDEDMA
-	def_bool BLK_DEV_IDEDMA_PCI || BLK_DEV_IDEDMA_PMAC || BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
+	def_bool BLK_DEV_IDEDMA_SFF || BLK_DEV_IDEDMA_PMAC || \
+		 BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 
 config IDE_ARCH_OBSOLETE_INIT
 	def_bool ALPHA || (ARM && !ARCH_L7200) || BLACKFIN || X86 || IA64 || M32R || MIPS || PARISC || PPC || (SUPERH64 && BLK_DEV_IDEPCI) || SPARC
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a4bb32883c6b..d0e7b537353e 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -198,7 +198,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
 
 EXPORT_SYMBOL_GPL(ide_build_sglist);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	ide_build_dmatable	-	build IDE DMA table
  *
@@ -316,7 +316,7 @@ void ide_destroy_dmatable (ide_drive_t *drive)
 
 EXPORT_SYMBOL_GPL(ide_destroy_dmatable);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	config_drive_for_dma	-	attempt to activate IDE DMA
  *	@drive: the drive to place in DMA mode
@@ -424,7 +424,7 @@ void ide_dma_host_set(ide_drive_t *drive, int on)
 }
 
 EXPORT_SYMBOL_GPL(ide_dma_host_set);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF  */
 
 /**
  *	ide_dma_off_quietly	-	Generic DMA kill
@@ -474,7 +474,7 @@ void ide_dma_on(ide_drive_t *drive)
 	drive->hwif->dma_host_set(drive, 1);
 }
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 /**
  *	ide_dma_setup	-	begin a DMA phase
  *	@drive: target device
@@ -591,7 +591,7 @@ static int __ide_dma_test_irq(ide_drive_t *drive)
 }
 #else
 static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
 
 int __ide_dma_bad_drive (ide_drive_t *drive)
 {
@@ -840,7 +840,7 @@ void ide_check_dma_crc(ide_drive_t *drive)
 		ide_dma_on(drive);
 }
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 void ide_dma_lost_irq (ide_drive_t *drive)
 {
 	printk("%s: DMA interrupt recovery\n", drive->name);
@@ -1002,4 +1002,4 @@ void ide_setup_dma(ide_hwif_t *hwif, unsigned long base)
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_dma);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index acec99da832d..40a01c3592df 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -998,8 +998,7 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, u8 *);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
-/* FIXME: palm_bk3710 uses BLK_DEV_IDEDMA_PCI without BLK_DEV_IDEPCI! */
-#if defined(CONFIG_BLK_DEV_IDEPCI) && defined(CONFIG_BLK_DEV_IDEDMA_PCI)
+#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 void ide_hwif_setup_dma(ide_hwif_t *, const struct ide_port_info *);
 #else
 static inline void ide_hwif_setup_dma(ide_hwif_t *hwif,
@@ -1146,7 +1145,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *);
 int ide_build_sglist(ide_drive_t *, struct request *);
 void ide_destroy_dmatable(ide_drive_t *);
 
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
 extern int ide_build_dmatable(ide_drive_t *, struct request *);
 extern int ide_release_dma(ide_hwif_t *);
 extern void ide_setup_dma(ide_hwif_t *, unsigned long);
@@ -1157,7 +1156,7 @@ extern void ide_dma_start(ide_drive_t *);
 extern int __ide_dma_end(ide_drive_t *);
 extern void ide_dma_lost_irq(ide_drive_t *);
 extern void ide_dma_timeout(ide_drive_t *);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
+#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
 
 #else
 static inline int ide_id_dma_bug(ide_drive_t *drive) { return 0; }
@@ -1171,7 +1170,7 @@ static inline int ide_set_dma(ide_drive_t *drive) { return 1; }
 static inline void ide_check_dma_crc(ide_drive_t *drive) { ; }
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 
-#ifndef CONFIG_BLK_DEV_IDEDMA_PCI
+#ifndef CONFIG_BLK_DEV_IDEDMA_SFF
 static inline void ide_release_dma(ide_hwif_t *drive) {;}
 #endif
 
-- 
cgit 


From 395d8ef5bebe547a80737692f9789d2e36da16f2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 11 Feb 2008 00:32:14 +0100
Subject: ide-disk: fix flush requests (take 2)

commit 813a0eb233ee67d7166241a8b389b6a76f2247f9
Author: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date:   Fri Jan 25 22:17:10 2008 +0100

    ide: switch idedisk_prepare_flush() to use REQ_TYPE_ATA_TASKFILE requests

...

broke flush requests.

Allocating IDE command structure on the stack for flush requests is not
a very brilliant idea:

- idedisk_prepare_flush() only prepares the request and it doesn't wait
  for it to be completed

- there are can be multiple flush requests queued in the queue

Fix the problem (per hints from James Bottomley) by:
- dynamically allocating ide_task_t instance using kmalloc(..., GFP_ATOMIC)
- adding new taskfile flag (IDE_TFLAG_DYN)
- calling kfree() in ide_end_drive_command() if IDE_TFLAG_DYN is set
  (while at it rename 'args' to 'task' and fix whitespace damage)

[ This will be fixed properly before 2.6.25 but this bug is rather
  critical and the proper solution requires some more work + testing. ]

Thanks to Sebastian Siewior and Christoph Hellwig for reporting the
problem and testing patches (extra thanks to Sebastian for bisecting
it to the guilty commmit).

Tested-by: Sebastian Siewior <ide-bug@ml.breakpoint.cc>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 18 +++++++++++-------
 drivers/ide/ide-io.c   | 16 ++++++++++------
 include/linux/ide.h    |  2 ++
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3c69822507e2..aed8b31ca561 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -590,20 +590,24 @@ static ide_proc_entry_t idedisk_proc[] = {
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
-	ide_task_t task;
+	ide_task_t *task = kmalloc(sizeof(*task), GFP_ATOMIC);
 
-	memset(&task, 0, sizeof(task));
+	/* FIXME: map struct ide_taskfile on rq->cmd[] */
+	BUG_ON(task == NULL);
+
+	memset(task, 0, sizeof(*task));
 	if (ide_id_has_flush_cache_ext(drive->id) &&
 	    (drive->capacity64 >= (1UL << 28)))
-		task.tf.command = WIN_FLUSH_CACHE_EXT;
+		task->tf.command = WIN_FLUSH_CACHE_EXT;
 	else
-		task.tf.command = WIN_FLUSH_CACHE;
-	task.tf_flags	= IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE;
-	task.data_phase	= TASKFILE_NO_DATA;
+		task->tf.command = WIN_FLUSH_CACHE;
+	task->tf_flags	 = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE |
+			   IDE_TFLAG_DYN;
+	task->data_phase = TASKFILE_NO_DATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq->cmd_flags |= REQ_SOFTBARRIER;
-	rq->special = &task;
+	rq->special = task;
 }
 
 /*
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index e41383fa3a51..715379605a7b 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -361,17 +361,21 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 	spin_unlock_irqrestore(&ide_lock, flags);
 
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		ide_task_t *args = (ide_task_t *) rq->special;
+		ide_task_t *task = (ide_task_t *)rq->special;
+
 		if (rq->errors == 0)
-			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
-			
-		if (args) {
-			struct ide_taskfile *tf = &args->tf;
+			rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT);
+
+		if (task) {
+			struct ide_taskfile *tf = &task->tf;
 
 			tf->error = err;
 			tf->status = stat;
 
-			ide_tf_read(drive, args);
+			ide_tf_read(drive, task);
+
+			if (task->tf_flags & IDE_TFLAG_DYN)
+				kfree(task);
 		}
 	} else if (blk_pm_request(rq)) {
 		struct request_pm_state *pm = rq->data;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 40a01c3592df..23fad89292df 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -906,6 +906,8 @@ enum {
 					  IDE_TFLAG_IN_DEVICE,
 	/* force 16-bit I/O operations */
 	IDE_TFLAG_IO_16BIT		= (1 << 30),
+	/* ide_task_t was allocated using kmalloc() */
+	IDE_TFLAG_DYN			= (1 << 31),
 };
 
 struct ide_taskfile {
-- 
cgit 


From 68a1f2cc8676f22a6fd49f344f99e326eb7f5117 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 7 Feb 2008 16:48:46 +0100
Subject: HID: fix processing of event quirks

The old code (before move) stopped further processing of the
event after it has been already processed by the quirk handler.

The new code didn't propagate the return value properly, and
therefore the processing always proceeded, which was wrong.

This patch fixes it. Pointed out in kernel.org bugzilla #9842

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input-quirks.c | 17 +++++++++--------
 drivers/hid/hid-input.c        |  3 ++-
 include/linux/hid.h            |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input-quirks.c b/drivers/hid/hid-input-quirks.c
index a870ba58faa3..dceadd0c1419 100644
--- a/drivers/hid/hid-input-quirks.c
+++ b/drivers/hid/hid-input-quirks.c
@@ -352,7 +352,7 @@ int hidinput_mapping_quirks(struct hid_usage *usage,
 	return 0;
 }
 
-void hidinput_event_quirks(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value)
+int hidinput_event_quirks(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value)
 {
 	struct input_dev *input;
 
@@ -362,34 +362,34 @@ void hidinput_event_quirks(struct hid_device *hid, struct hid_field *field, stru
 		|| ((hid->quirks & HID_QUIRK_2WHEEL_MOUSE_HACK_7) && (usage->hid == 0x00090007))) {
 		if (value) hid->quirks |=  HID_QUIRK_2WHEEL_MOUSE_HACK_ON;
 		else       hid->quirks &= ~HID_QUIRK_2WHEEL_MOUSE_HACK_ON;
-		return;
+		return 1;
 	}
 
 	if ((hid->quirks & HID_QUIRK_2WHEEL_MOUSE_HACK_B8) &&
 			(usage->type == EV_REL) &&
 			(usage->code == REL_WHEEL)) {
 		hid->delayed_value = value;
-		return;
+		return 1;
 	}
 
 	if ((hid->quirks & HID_QUIRK_2WHEEL_MOUSE_HACK_B8) &&
 			(usage->hid == 0x000100b8)) {
 		input_event(input, EV_REL, value ? REL_HWHEEL : REL_WHEEL, hid->delayed_value);
-		return;
+		return 1;
 	}
 
 	if ((hid->quirks & HID_QUIRK_INVERT_HWHEEL) && (usage->code == REL_HWHEEL)) {
 		input_event(input, usage->type, usage->code, -value);
-		return;
+		return 1;
 	}
 
 	if ((hid->quirks & HID_QUIRK_2WHEEL_MOUSE_HACK_ON) && (usage->code == REL_WHEEL)) {
 		input_event(input, usage->type, REL_HWHEEL, value);
-		return;
+		return 1;
 	}
 
 	if ((hid->quirks & HID_QUIRK_APPLE_HAS_FN) && hidinput_apple_event(hid, input, usage, value))
-		return;
+		return 1;
 
 	/* Handling MS keyboards special buttons */
 	if (hid->quirks & HID_QUIRK_MICROSOFT_KEYS && 
@@ -416,8 +416,9 @@ void hidinput_event_quirks(struct hid_device *hid, struct hid_field *field, stru
 	if (hid->quirks & HID_QUIRK_HWHEEL_WHEEL_INVERT &&
 			usage->type == EV_REL && usage->code == REL_HWHEEL) {
 		input_event(input, usage->type, REL_WHEEL, -value);
-		return;
+		return 1;
 	}
+	return 0;
 }
 
 
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 5325d98b4328..43342785110c 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -854,7 +854,8 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 		return;
 
 	/* handle input events for quirky devices */
-	hidinput_event_quirks(hid, field, usage, value);
+	if (hidinput_event_quirks(hid, field, usage, value))
+		return;
 
 	if (usage->hat_min < usage->hat_max || usage->hat_dir) {
 		int hat_dir = usage->hat_dir;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 3902690647b0..74ff57596eb1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -528,7 +528,7 @@ int hid_set_field(struct hid_field *, unsigned, __s32);
 int hid_input_report(struct hid_device *, int type, u8 *, int, int);
 int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field);
 int hidinput_mapping_quirks(struct hid_usage *, struct input_dev *, unsigned long **, int *);
-void hidinput_event_quirks(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
+int hidinput_event_quirks(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
 int hidinput_apple_event(struct hid_device *, struct input_dev *, struct hid_usage *, __s32);
 void hid_input_field(struct hid_device *hid, struct hid_field *field, __u8 *data, int interrupt);
 void hid_output_report(struct hid_report *report, __u8 *data);
-- 
cgit 


From 33ba509191dd6c6735cc96d2ba411fa311f9a6be Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 5 Feb 2008 00:02:01 +0000
Subject: DM9000: Add platform data to specify external phy

Patch from: Laurent Pinchart <laurentp@cse-semaphore.com>

This patch adds a flag to the DM9000 platform data which, when set,
configures the device to use an external PHY.

Signed-off-by: Laurent Pinchart <laurentp@cse-semaphore.com>
Signed-off-by: Ben Dooks <ben-linuy@fluff.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/dm9000.c   | 6 ++++++
 include/linux/dm9000.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index e4390d917b81..b5e47dfa5529 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -137,6 +137,7 @@ typedef struct board_info {
 	u16 dbug_cnt;
 	u8 io_mode;		/* 0:word, 2:byte */
 	u8 phy_addr;
+	unsigned int flags;
 
 	void (*inblk)(void __iomem *port, void *data, int length);
 	void (*outblk)(void __iomem *port, void *data, int length);
@@ -525,6 +526,8 @@ dm9000_probe(struct platform_device *pdev)
 
 		if (pdata->dumpblk != NULL)
 			db->dumpblk = pdata->dumpblk;
+
+		db->flags = pdata->flags;
 	}
 
 	dm9000_reset(db);
@@ -665,6 +668,9 @@ dm9000_init_dm9000(struct net_device *dev)
 	iow(db, DM9000_GPCR, GPCR_GEP_CNTL);	/* Let GPIO0 output */
 	iow(db, DM9000_GPR, 0);	/* Enable PHY */
 
+	if (db->flags & DM9000_PLATF_EXT_PHY)
+		iow(db, DM9000_NCR, NCR_EXT_PHY);
+
 	/* Program operating register */
 	iow(db, DM9000_TCR, 0);	        /* TX Polling clear */
 	iow(db, DM9000_BPTR, 0x3f);	/* Less 3Kb, 200us */
diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
index 0008e2ad0c9f..ea530fd1be74 100644
--- a/include/linux/dm9000.h
+++ b/include/linux/dm9000.h
@@ -19,6 +19,7 @@
 #define DM9000_PLATF_8BITONLY	(0x0001)
 #define DM9000_PLATF_16BITONLY	(0x0002)
 #define DM9000_PLATF_32BITONLY	(0x0004)
+#define DM9000_PLATF_EXT_PHY	(0x0008)
 
 /* platfrom data for platfrom device structure's platfrom_data field */
 
-- 
cgit 


From bb44fb70e069412c08e07f494b6b4e985f6331ac Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 5 Feb 2008 00:02:20 +0000
Subject: DM9000: Add platform flag for no attached EEPROM

Allow the platform data to specify to the DM9000 driver
that there is no posibility of an attached EEPROM on the
device, so default all reads to 0xff and ignore any
write operations.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/dm9000.c   | 15 +++++++++++++++
 include/linux/dm9000.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index d0cd7f945fde..afd2cf509073 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -405,6 +405,9 @@ static int dm9000_get_eeprom(struct net_device *dev,
 	if ((len & 1) != 0 || (offset & 1) != 0)
 		return -EINVAL;
 
+	if (dm->flags & DM9000_PLATF_NO_EEPROM)
+		return -ENOENT;
+
 	ee->magic = DM_EEPROM_MAGIC;
 
 	for (i = 0; i < len; i += 2)
@@ -426,6 +429,9 @@ static int dm9000_set_eeprom(struct net_device *dev,
 	if ((len & 1) != 0 || (offset & 1) != 0)
 		return -EINVAL;
 
+	if (dm->flags & DM9000_PLATF_NO_EEPROM)
+		return -ENOENT;
+
 	if (ee->magic != DM_EEPROM_MAGIC)
 		return -EINVAL;
 
@@ -1100,6 +1106,12 @@ dm9000_read_eeprom(board_info_t *db, int offset, u8 *to)
 {
 	unsigned long flags;
 
+	if (db->flags & DM9000_PLATF_NO_EEPROM) {
+		to[0] = 0xff;
+		to[1] = 0xff;
+		return;
+	}
+
 	mutex_lock(&db->addr_lock);
 
 	spin_lock_irqsave(&db->lock, flags);
@@ -1134,6 +1146,9 @@ dm9000_write_eeprom(board_info_t *db, int offset, u8 *data)
 {
 	unsigned long flags;
 
+	if (db->flags & DM9000_PLATF_NO_EEPROM)
+		return;
+
 	mutex_lock(&db->addr_lock);
 
 	spin_lock_irqsave(&db->lock, flags);
diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
index ea530fd1be74..a3750462f9e3 100644
--- a/include/linux/dm9000.h
+++ b/include/linux/dm9000.h
@@ -20,6 +20,7 @@
 #define DM9000_PLATF_16BITONLY	(0x0002)
 #define DM9000_PLATF_32BITONLY	(0x0004)
 #define DM9000_PLATF_EXT_PHY	(0x0008)
+#define DM9000_PLATF_NO_EEPROM	(0x0010)
 
 /* platfrom data for platfrom device structure's platfrom_data field */
 
-- 
cgit 


From 1f07e988290fc45932f5028c9e2a862c37a57336 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 11 Feb 2008 01:35:20 +0100
Subject: Prevent IDE boot ops on NUMA system

Without this patch a Opteron test system here oopses at boot with
current git.

Calling to_pci_dev() on a NULL pointer gives a negative value so the
following NULL pointer check never triggers and then an illegal address
is referenced.  Check the unadjusted original device pointer for NULL
instead.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 23fad89292df..a3b69c10d667 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1295,7 +1295,7 @@ static inline void ide_dump_identify(u8 *id)
 static inline int hwif_to_node(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	return dev ? pcibus_to_node(dev->bus) : -1;
+	return hwif->dev ? pcibus_to_node(dev->bus) : -1;
 }
 
 static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive)
-- 
cgit 


From 31f1de46b90ad360a16e7af3e277d104961df923 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 12 Feb 2008 13:30:22 +0900
Subject: mempolicy: silently restrict nodemask to allowed nodes

Kosaki Motohito noted that "numactl --interleave=all ..." failed in the
presence of memoryless nodes.  This patch attempts to fix that problem.

Some background:

numactl --interleave=all calls set_mempolicy(2) with a fully populated
[out to MAXNUMNODES] nodemask.  set_mempolicy() [in do_set_mempolicy()]
calls contextualize_policy() which requires that the nodemask be a
subset of the current task's mems_allowed; else EINVAL will be returned.

A task's mems_allowed will always be a subset of node_states[N_HIGH_MEMORY]
i.e., nodes with memory.  So, a fully populated nodemask will be
declared invalid if it includes memoryless nodes.

  NOTE:  the same thing will occur when running in a cpuset
         with restricted mem_allowed--for the same reason:
         node mask contains dis-allowed nodes.

mbind(2), on the other hand, just masks off any nodes in the nodemask
that are not included in the caller's mems_allowed.

In each case [mbind() and set_mempolicy()], mpol_check_policy() will
complain [again, resulting in EINVAL] if the nodemask contains any
memoryless nodes.  This is somewhat redundant as mpol_new() will remove
memoryless nodes for interleave policy, as will bind_zonelist()--called
by mpol_new() for BIND policy.

Proposed fix:

1) modify contextualize_policy logic to:
   a) remember whether the incoming node mask is empty.
   b) if not, restrict the nodemask to allowed nodes, as is
      currently done in-line for mbind().  This guarantees
      that the resulting mask includes only nodes with memory.

      NOTE:  this is a [benign, IMO] change in behavior for
             set_mempolicy().  Dis-allowed nodes will be
             silently ignored, rather than returning an error.

   c) fold this code into mpol_check_policy(), replace 2 calls to
      contextualize_policy() to call mpol_check_policy() directly
      and remove contextualize_policy().

2) In existing mpol_check_policy() logic, after "contextualization":
   a) MPOL_DEFAULT:  require that in coming mask "was_empty"
   b) MPOL_{BIND|INTERLEAVE}:  require that contextualized nodemask
      contains at least one node.
   c) add a case for MPOL_PREFERRED:  if in coming was not empty
      and resulting mask IS empty, user specified invalid nodes.
      Return EINVAL.
   c) remove the now redundant check for memoryless nodes

3) remove the now redundant masking of policy nodes for interleave
   policy from mpol_new().

4) Now that mpol_check_policy() contextualizes the nodemask, remove
   the in-line nodes_and() from sys_mbind().  I believe that this
   restores mbind() to the behavior before the memoryless-nodes
   patch series.  E.g., we'll no longer treat an invalid nodemask
   with MPOL_PREFERRED as local allocation.

[ Patch history:

  v1 -> v2:
   - Communicate whether or not incoming node mask was empty to
     mpol_check_policy() for better error checking.
   - As suggested by David Rientjes, remove the now unused
     cpuset_nodes_subset_current_mems_allowed() from cpuset.h

  v2 -> v3:
   - As suggested by Kosaki Motohito, fold the "contextualization"
     of policy nodemask into mpol_check_policy().  Looks a little
     cleaner. ]

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by:  KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by:      KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by:       David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h |  3 ---
 mm/mempolicy.c         | 61 +++++++++++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f8c9a2752f06..0a26be353cb3 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -26,8 +26,6 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
 void cpuset_update_task_memory_state(void);
-#define cpuset_nodes_subset_current_mems_allowed(nodes) \
-		nodes_subset((nodes), current->mems_allowed)
 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
 
 extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
@@ -103,7 +101,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
 static inline void cpuset_init_current_mems_allowed(void) {}
 static inline void cpuset_update_task_memory_state(void) {}
-#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
 
 static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83c69f8a64c2..8d246c3b340f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -116,22 +116,51 @@ static void mpol_rebind_policy(struct mempolicy *pol,
 /* Do sanity checking on a policy */
 static int mpol_check_policy(int mode, nodemask_t *nodes)
 {
-	int empty = nodes_empty(*nodes);
+	int was_empty, is_empty;
+
+	if (!nodes)
+		return 0;
+
+	/*
+	 * "Contextualize" the in-coming nodemast for cpusets:
+	 * Remember whether in-coming nodemask was empty,  If not,
+	 * restrict the nodes to the allowed nodes in the cpuset.
+	 * This is guaranteed to be a subset of nodes with memory.
+	 */
+	cpuset_update_task_memory_state();
+	is_empty = was_empty = nodes_empty(*nodes);
+	if (!was_empty) {
+		nodes_and(*nodes, *nodes, cpuset_current_mems_allowed);
+		is_empty = nodes_empty(*nodes);	/* after "contextualization" */
+	}
 
 	switch (mode) {
 	case MPOL_DEFAULT:
-		if (!empty)
+		/*
+		 * require caller to specify an empty nodemask
+		 * before "contextualization"
+		 */
+		if (!was_empty)
 			return -EINVAL;
 		break;
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
-		/* Preferred will only use the first bit, but allow
-		   more for now. */
-		if (empty)
+		/*
+		 * require at least 1 valid node after "contextualization"
+		 */
+		if (is_empty)
+			return -EINVAL;
+		break;
+	case MPOL_PREFERRED:
+		/*
+		 * Did caller specify invalid nodes?
+		 * Don't silently accept this as "local allocation".
+		 */
+		if (!was_empty && is_empty)
 			return -EINVAL;
 		break;
 	}
- 	return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL;
+	return 0;
 }
 
 /* Generate a custom zonelist for the BIND policy. */
@@ -188,8 +217,6 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
 	switch (mode) {
 	case MPOL_INTERLEAVE:
 		policy->v.nodes = *nodes;
-		nodes_and(policy->v.nodes, policy->v.nodes,
-					node_states[N_HIGH_MEMORY]);
 		if (nodes_weight(policy->v.nodes) == 0) {
 			kmem_cache_free(policy_cache, policy);
 			return ERR_PTR(-EINVAL);
@@ -421,18 +448,6 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start,
 	return err;
 }
 
-static int contextualize_policy(int mode, nodemask_t *nodes)
-{
-	if (!nodes)
-		return 0;
-
-	cpuset_update_task_memory_state();
-	if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
-		return -EINVAL;
-	return mpol_check_policy(mode, nodes);
-}
-
-
 /*
  * Update task->flags PF_MEMPOLICY bit: set iff non-default
  * mempolicy.  Allows more rapid checking of this (combined perhaps
@@ -468,7 +483,7 @@ static long do_set_mempolicy(int mode, nodemask_t *nodes)
 {
 	struct mempolicy *new;
 
-	if (contextualize_policy(mode, nodes))
+	if (mpol_check_policy(mode, nodes))
 		return -EINVAL;
 	new = mpol_new(mode, nodes);
 	if (IS_ERR(new))
@@ -915,10 +930,6 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 	err = get_nodes(&nodes, nmask, maxnode);
 	if (err)
 		return err;
-#ifdef CONFIG_CPUSETS
-	/* Restrict the nodes to the allowed nodes in the cpuset */
-	nodes_and(nodes, nodes, current->mems_allowed);
-#endif
 	return do_mbind(start, len, mode, &nodes, flags);
 }
 
-- 
cgit 


From d5bd0146f0d61f7dc9904a7cc6d5cb9832034de4 Mon Sep 17 00:00:00 2001
From: Neil Turton <nturton@solarflare.com>
Date: Tue, 12 Feb 2008 23:13:48 -0800
Subject: [NET]: Improve cache line coherency of ingress qdisc

Move the ingress qdisc members of struct net_device from the transmit
cache line to the receive cache line to avoid cache line ping-pong.
These members are only used on the receive path.

Signed-off-by: Neil Turton <nturton@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 047d432bde55..4ffa49dbb66f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -604,6 +604,10 @@ struct net_device
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+	/* ingress path synchronizer */
+	spinlock_t		ingress_lock;
+	struct Qdisc		*qdisc_ingress;
+
 /*
  * Cache line mostly used on queue transmit path (qdisc)
  */
@@ -617,10 +621,6 @@ struct net_device
 	/* Partially transmitted GSO packet. */
 	struct sk_buff		*gso_skb;
 
-	/* ingress path synchronizer */
-	spinlock_t		ingress_lock;
-	struct Qdisc		*qdisc_ingress;
-
 /*
  * One part is mostly used on xmit path (device)
  */
-- 
cgit 


From 9f0c1e560c43327b70998e6c702b2f01321130d9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:39 +0100
Subject: sched: rt-group: interface

Change the rt_ratio interface to rt_runtime_us, to match rt_period_us.
This avoids picking a granularity for the ratio.

Extend the /sys/kernel/uids/<uid>/ interface to allow setting
the group's rt_runtime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/sched-rt-group.txt |  59 ++++++++++++++++
 include/linux/sched.h            |   7 +-
 kernel/sched.c                   | 141 ++++++++++++++++++++++++++++++---------
 kernel/sched_rt.c                |  53 +++++++--------
 kernel/sysctl.c                  |  32 ++++-----
 kernel/user.c                    |  28 ++++++++
 6 files changed, 242 insertions(+), 78 deletions(-)
 create mode 100644 Documentation/sched-rt-group.txt

(limited to 'include/linux')

diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt
new file mode 100644
index 000000000000..1c6332f4543c
--- /dev/null
+++ b/Documentation/sched-rt-group.txt
@@ -0,0 +1,59 @@
+
+
+Real-Time group scheduling.
+
+The problem space:
+
+In order to schedule multiple groups of realtime tasks each group must
+be assigned a fixed portion of the CPU time available. Without a minimum
+guarantee a realtime group can obviously fall short. A fuzzy upper limit
+is of no use since it cannot be relied upon. Which leaves us with just
+the single fixed portion.
+
+CPU time is divided by means of specifying how much time can be spent
+running in a given period. Say a frame fixed realtime renderer must
+deliver 25 frames a second, which yields a period of 0.04s. Now say
+it will also have to play some music and respond to input, leaving it
+with around 80% for the graphics. We can then give this group a runtime
+of 0.8 * 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s runtime
+limit.
+
+Now if the audio thread needs to refill the DMA buffer every 0.005s, but
+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
+= 0.00015s.
+
+
+The Interface:
+
+system wide:
+
+/proc/sys/kernel/sched_rt_period_ms
+/proc/sys/kernel/sched_rt_runtime_us
+
+CONFIG_FAIR_USER_SCHED
+
+/sys/kernel/uids/<uid>/cpu_rt_runtime_us
+
+or
+
+CONFIG_FAIR_CGROUP_SCHED
+
+/cgroup/<cgroup>/cpu.rt_runtime_us
+
+[ time is specified in us because the interface is s32; this gives an
+  operating range of ~35m to 1us ]
+
+The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
+
+A runtime of -1 specifies runtime == period, ie. no limit.
+
+New groups get the period from /proc/sys/kernel/sched_rt_period_us and
+a runtime of 0.
+
+Settings are constrained to:
+
+   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
+
+in order to keep the configuration schedulable.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 00e144117326..142eb293f9c4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1541,8 +1541,6 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_rt_period;
-extern unsigned int sysctl_sched_rt_ratio;
 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 extern unsigned int sysctl_sched_min_bal_int_shares;
 extern unsigned int sysctl_sched_max_bal_int_shares;
@@ -1552,6 +1550,8 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
 
 extern unsigned int sysctl_sched_compat_yield;
 
@@ -2036,6 +2036,9 @@ extern void sched_destroy_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 extern unsigned long sched_group_shares(struct task_group *tg);
+extern int sched_group_set_rt_runtime(struct task_group *tg,
+				      long rt_runtime_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
 
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index cecaea67ae9b..85a5fbff2b00 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,7 +176,7 @@ struct task_group {
 	struct sched_rt_entity **rt_se;
 	struct rt_rq **rt_rq;
 
-	unsigned int rt_ratio;
+	u64 rt_runtime;
 
 	/*
 	 * shares assigned to a task group governs how much of cpu bandwidth
@@ -642,19 +642,21 @@ const_debug unsigned int sysctl_sched_features =
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
 /*
- * period over which we measure -rt task cpu usage in ms.
+ * period over which we measure -rt task cpu usage in us.
  * default: 1s
  */
-const_debug unsigned int sysctl_sched_rt_period = 1000;
+unsigned int sysctl_sched_rt_period = 1000000;
 
-#define SCHED_RT_FRAC_SHIFT	16
-#define SCHED_RT_FRAC		(1UL << SCHED_RT_FRAC_SHIFT)
+/*
+ * part of the period that we allow rt tasks to run in us.
+ * default: 0.95s
+ */
+int sysctl_sched_rt_runtime = 950000;
 
 /*
- * ratio of time -rt tasks may consume.
- * default: 95%
+ * single value that denotes runtime == period, ie unlimited time.
  */
-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+#define RUNTIME_INF	((u64)~0ULL)
 
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -7187,7 +7189,8 @@ void __init sched_init(void)
 				&per_cpu(init_cfs_rq, i),
 				&per_cpu(init_sched_entity, i), i, 1);
 
-		init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+		init_task_group.rt_runtime =
+			sysctl_sched_rt_runtime * NSEC_PER_USEC;
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
 		init_tg_rt_entry(rq, &init_task_group,
 				&per_cpu(init_rt_rq, i),
@@ -7583,7 +7586,7 @@ struct task_group *sched_create_group(void)
 		goto err;
 
 	tg->shares = NICE_0_LOAD;
-	tg->rt_ratio = 0; /* XXX */
+	tg->rt_runtime = 0;
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
@@ -7785,30 +7788,76 @@ unsigned long sched_group_shares(struct task_group *tg)
 }
 
 /*
- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ * Ensure that the real time constraints are schedulable.
  */
-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+static unsigned long to_ratio(u64 period, u64 runtime)
+{
+	if (runtime == RUNTIME_INF)
+		return 1ULL << 16;
+
+	runtime *= (1ULL << 16);
+	div64_64(runtime, period);
+	return runtime;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 {
 	struct task_group *tgi;
 	unsigned long total = 0;
+	unsigned long global_ratio =
+		to_ratio(sysctl_sched_rt_period,
+			 sysctl_sched_rt_runtime < 0 ?
+				RUNTIME_INF : sysctl_sched_rt_runtime);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(tgi, &task_groups, list)
-		total += tgi->rt_ratio;
-	rcu_read_unlock();
+	list_for_each_entry_rcu(tgi, &task_groups, list) {
+		if (tgi == tg)
+			continue;
 
-	if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
-		return -EINVAL;
+		total += to_ratio(period, tgi->rt_runtime);
+	}
+	rcu_read_unlock();
 
-	tg->rt_ratio = rt_ratio;
-	return 0;
+	return total + to_ratio(period, runtime) < global_ratio;
 }
 
-unsigned long sched_group_rt_ratio(struct task_group *tg)
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 {
-	return tg->rt_ratio;
+	u64 rt_runtime, rt_period;
+	int err = 0;
+
+	rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+	if (rt_runtime_us == -1)
+		rt_runtime = rt_period;
+
+	mutex_lock(&rt_constraints_mutex);
+	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+	if (rt_runtime_us == -1)
+		rt_runtime = RUNTIME_INF;
+	tg->rt_runtime = rt_runtime;
+ unlock:
+	mutex_unlock(&rt_constraints_mutex);
+
+	return err;
 }
 
+long sched_group_rt_runtime(struct task_group *tg)
+{
+	u64 rt_runtime_us;
+
+	if (tg->rt_runtime == RUNTIME_INF)
+		return -1;
+
+	rt_runtime_us = tg->rt_runtime;
+	do_div(rt_runtime_us, NSEC_PER_USEC);
+	return rt_runtime_us;
+}
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
 #ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -7884,17 +7933,49 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
 	return (u64) tg->shares;
 }
 
-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
-		u64 rt_ratio_val)
+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
+				struct file *file,
+				const char __user *userbuf,
+				size_t nbytes, loff_t *unused_ppos)
 {
-	return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+	char buffer[64];
+	int retval = 0;
+	s64 val;
+	char *end;
+
+	if (!nbytes)
+		return -EINVAL;
+	if (nbytes >= sizeof(buffer))
+		return -E2BIG;
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+
+	/* strip newline if necessary */
+	if (nbytes && (buffer[nbytes-1] == '\n'))
+		buffer[nbytes-1] = 0;
+	val = simple_strtoll(buffer, &end, 0);
+	if (*end)
+		return -EINVAL;
+
+	/* Pass to subsystem */
+	retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+	if (!retval)
+		retval = nbytes;
+	return retval;
 }
 
-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
 {
-	struct task_group *tg = cgroup_tg(cgrp);
+	char tmp[64];
+	long val = sched_group_rt_runtime(cgroup_tg(cgrp));
+	int len = sprintf(tmp, "%ld\n", val);
 
-	return (u64) tg->rt_ratio;
+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
 static struct cftype cpu_files[] = {
@@ -7904,9 +7985,9 @@ static struct cftype cpu_files[] = {
 		.write_uint = cpu_shares_write_uint,
 	},
 	{
-		.name = "rt_ratio",
-		.read_uint = cpu_rt_ratio_read_uint,
-		.write_uint = cpu_rt_ratio_write_uint,
+		.name = "rt_runtime_us",
+		.read = cpu_rt_runtime_read,
+		.write = cpu_rt_runtime_write,
 	},
 };
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8d4269381239..35825b28e429 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
 	if (!rt_rq->tg)
-		return SCHED_RT_FRAC;
+		return RUNTIME_INF;
 
-	return rt_rq->tg->rt_ratio;
+	return rt_rq->tg->rt_runtime;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 
-static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
 	}
 }
 
-static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
@@ -129,9 +129,12 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
 
 #else
 
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
-	return sysctl_sched_rt_ratio;
+	if (sysctl_sched_rt_runtime == -1)
+		return RUNTIME_INF;
+
+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
 }
 
 #define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -158,11 +161,11 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 	return NULL;
 }
 
-static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 }
 
-static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
 }
 
@@ -184,28 +187,24 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 	return rt_task_of(rt_se)->prio;
 }
 
-static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
-	unsigned int rt_ratio = sched_rt_ratio(rt_rq);
-	u64 period, ratio;
+	u64 runtime = sched_rt_runtime(rt_rq);
 
-	if (rt_ratio == SCHED_RT_FRAC)
+	if (runtime == RUNTIME_INF)
 		return 0;
 
 	if (rt_rq->rt_throttled)
 		return rt_rq_throttled(rt_rq);
 
-	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
-	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
-	if (rt_rq->rt_time > ratio) {
+	if (rt_rq->rt_time > runtime) {
 		struct rq *rq = rq_of_rt_rq(rt_rq);
 
 		rq->rt_throttled = 1;
 		rt_rq->rt_throttled = 1;
 
 		if (rt_rq_throttled(rt_rq)) {
-			sched_rt_ratio_dequeue(rt_rq);
+			sched_rt_rq_dequeue(rt_rq);
 			return 1;
 		}
 	}
@@ -219,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
 	u64 period;
 
 	while (rq->clock > rq->rt_period_expire) {
-		period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+		period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
 		rq->rt_period_expire += period;
 
 		for_each_leaf_rt_rq(rt_rq, rq) {
-			unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-			u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+			u64 runtime = sched_rt_runtime(rt_rq);
 
-			rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
-			if (rt_rq->rt_throttled) {
+			rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
+			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 				rt_rq->rt_throttled = 0;
-				sched_rt_ratio_enqueue(rt_rq);
+				sched_rt_rq_enqueue(rt_rq);
 			}
 		}
 
@@ -262,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
 	cpuacct_charge(curr, delta_exec);
 
 	rt_rq->rt_time += delta_exec;
-	/*
-	 * might make it a tad more accurate:
-	 *
-	 * update_sched_rt_period(rq);
-	 */
-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (sched_rt_runtime_exceeded(rt_rq))
 		resched_task(curr);
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d41ef6b4cf72..924c674b76ea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_period_ms",
-		.data		= &sysctl_sched_rt_period,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_rt_ratio",
-		.data		= &sysctl_sched_rt_ratio,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 	{
 		.ctl_name       = CTL_UNNUMBERED,
@@ -346,6 +330,22 @@ static struct ctl_table kern_table[] = {
 	},
 #endif
 #endif
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_period_us",
+		.data		= &sysctl_sched_rt_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_rt_runtime_us",
+		.data		= &sysctl_sched_rt_runtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_compat_yield",
diff --git a/kernel/user.c b/kernel/user.c
index 7d7900c5a1fd..9f6d471bfd03 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -164,9 +164,37 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
 static struct kobj_attribute cpu_share_attr =
 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
 
+static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   char *buf)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+
+	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+}
+
+static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
+	unsigned long rt_runtime;
+	int rc;
+
+	sscanf(buf, "%lu", &rt_runtime);
+
+	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
+
+	return (rc ? rc : size);
+}
+
+static struct kobj_attribute cpu_rt_runtime_attr =
+	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+
 /* default attributes per uid directory */
 static struct attribute *uids_attributes[] = {
 	&cpu_share_attr.attr,
+	&cpu_rt_runtime_attr.attr,
 	NULL
 };
 
-- 
cgit 


From 052f1dc7eb02300b05170ae341ccd03b76207778 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 Feb 2008 15:45:40 +0100
Subject: sched: rt-group: make rt groups scheduling configurable

Make the rt group scheduler compile time configurable.
Keep it experimental for now.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup_subsys.h |   2 +-
 include/linux/sched.h         |  11 ++--
 init/Kconfig                  |  23 +++++--
 kernel/sched.c                | 148 ++++++++++++++++++++++++++++++------------
 kernel/sched_rt.c             |  12 ++--
 kernel/user.c                 |  22 +++++--
 6 files changed, 151 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 228235c5ae53..ac6aad98b607 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -25,7 +25,7 @@ SUBSYS(ns)
 
 /* */
 
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 SUBSYS(cpu_cgroup)
 #endif
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 142eb293f9c4..b9bb313fe1ae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -590,7 +590,7 @@ struct user_struct {
 	struct hlist_node uidhash_node;
 	uid_t uid;
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	struct task_group *tg;
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;
@@ -973,7 +973,7 @@ struct sched_rt_entity {
 	unsigned long timeout;
 	int nr_cpus_allowed;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct sched_rt_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct rt_rq		*rt_rq;
@@ -2027,19 +2027,22 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
 
 extern void normalize_rt_tasks(void);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
 extern struct task_group init_task_group;
 
 extern struct task_group *sched_create_group(void);
 extern void sched_destroy_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
+#ifdef CONFIG_FAIR_GROUP_SCHED
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 extern unsigned long sched_group_shares(struct task_group *tg);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 extern int sched_group_set_rt_runtime(struct task_group *tg,
 				      long rt_runtime_us);
 extern long sched_group_rt_runtime(struct task_group *tg);
-
+#endif
 #endif
 
 #ifdef CONFIG_TASK_XACCT
diff --git a/init/Kconfig b/init/Kconfig
index 824d48cb67bf..dcef8b55011a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -311,25 +311,36 @@ config CPUSETS
 
 	  Say N if unsure.
 
-config FAIR_GROUP_SCHED
-	bool "Fair group CPU scheduler"
+config GROUP_SCHED
+	bool "Group CPU scheduler"
 	default y
 	help
 	  This feature lets CPU scheduler recognize task groups and control CPU
 	  bandwidth allocation to such task groups.
 
+config FAIR_GROUP_SCHED
+	bool "Group scheduling for SCHED_OTHER"
+	depends on GROUP_SCHED
+	default y
+
+config RT_GROUP_SCHED
+	bool "Group scheduling for SCHED_RR/FIFO"
+	depends on EXPERIMENTAL
+	depends on GROUP_SCHED
+	default n
+
 choice
-	depends on FAIR_GROUP_SCHED
+	depends on GROUP_SCHED
 	prompt "Basis for grouping tasks"
-	default FAIR_USER_SCHED
+	default USER_SCHED
 
-config FAIR_USER_SCHED
+config USER_SCHED
 	bool "user id"
 	help
 	  This option will choose userid as the basis for grouping
 	  tasks, thus providing equal CPU bandwidth to each user.
 
-config FAIR_CGROUP_SCHED
+config CGROUP_SCHED
 	bool "Control groups"
  	depends on CGROUPS
  	help
diff --git a/kernel/sched.c b/kernel/sched.c
index 85a5fbff2b00..5edc549edae8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -155,7 +155,7 @@ struct rt_prio_array {
 	struct list_head queue[MAX_RT_PRIO];
 };
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
 #include <linux/cgroup.h>
 
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups);
 
 /* task group related information */
 struct task_group {
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 	struct cgroup_subsys_state css;
 #endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
 	struct cfs_rq **cfs_rq;
 
-	struct sched_rt_entity **rt_se;
-	struct rt_rq **rt_rq;
-
-	u64 rt_runtime;
-
 	/*
 	 * shares assigned to a task group governs how much of cpu bandwidth
 	 * is allocated to the group. The more shares a group has, the more is
@@ -213,24 +210,36 @@ struct task_group {
 	 *
 	 */
 	unsigned long shares;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity **rt_se;
+	struct rt_rq **rt_rq;
+
+	u64 rt_runtime;
+#endif
 
 	struct rcu_head rcu;
 	struct list_head list;
 };
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* Default task group's sched entity on each cpu */
 static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
 /* Default task group's cfs_rq on each cpu */
 static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
 
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
-
 static struct sched_entity *init_sched_entity_p[NR_CPUS];
 static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
+static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 
 static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
 static struct rt_rq *init_rt_rq_p[NR_CPUS];
+#endif
 
 /* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
@@ -240,6 +249,7 @@ static DEFINE_SPINLOCK(task_group_lock);
 /* doms_cur_mutex serializes access to doms_cur[] array */
 static DEFINE_MUTEX(doms_cur_mutex);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
 /* kernel thread that runs rebalance_shares() periodically */
 static struct task_struct *lb_monitor_task;
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused);
 
 static void set_se_shares(struct sched_entity *se, unsigned long shares);
 
+#ifdef CONFIG_USER_SCHED
+# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
+#else
+# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
+#endif
+
+#define MIN_GROUP_SHARES	2
+
+static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+#endif
+
 /* Default task group.
  *	Every task in system belong to this group at bootup.
  */
 struct task_group init_task_group = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	.se	= init_sched_entity_p,
 	.cfs_rq = init_cfs_rq_p,
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	.rt_se	= init_sched_rt_entity_p,
 	.rt_rq	= init_rt_rq_p,
-};
-
-#ifdef CONFIG_FAIR_USER_SCHED
-# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
-#else
-# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
 #endif
-
-#define MIN_GROUP_SHARES	2
-
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+};
 
 /* return group to which a task belongs */
 static inline struct task_group *task_group(struct task_struct *p)
 {
 	struct task_group *tg;
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	tg = p->user->tg;
-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+#elif defined(CONFIG_CGROUP_SCHED)
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
 #else
@@ -288,11 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p)
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
 	p->se.parent = task_group(p)->se[cpu];
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
 	p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
 }
 
 static inline void lock_doms_cur(void)
@@ -311,7 +330,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
 static inline void lock_doms_cur(void) { }
 static inline void unlock_doms_cur(void) { }
 
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+#endif	/* CONFIG_GROUP_SCHED */
 
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
@@ -351,7 +370,7 @@ struct cfs_rq {
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	int highest_prio; /* highest queued rt task prio */
 #endif
 #ifdef CONFIG_SMP
@@ -361,7 +380,7 @@ struct rt_rq {
 	int rt_throttled;
 	u64 rt_time;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	unsigned long rt_nr_boosted;
 
 	struct rq *rq;
@@ -437,6 +456,8 @@ struct rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct list_head leaf_rt_rq_list;
 #endif
 
@@ -7104,7 +7125,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	/* delimiter for bitsearch: */
 	__set_bit(MAX_RT_PRIO, array->bitmap);
 
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	rt_rq->highest_prio = MAX_RT_PRIO;
 #endif
 #ifdef CONFIG_SMP
@@ -7115,7 +7136,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	rt_rq->rt_time = 0;
 	rt_rq->rt_throttled = 0;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 #endif
@@ -7139,7 +7160,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
 	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
 	se->parent = NULL;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
 		struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
 		int cpu, int add)
@@ -7168,7 +7191,7 @@ void __init sched_init(void)
 	init_defrootdomain();
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 	list_add(&init_task_group.list, &task_groups);
 #endif
 
@@ -7189,6 +7212,8 @@ void __init sched_init(void)
 				&per_cpu(init_cfs_rq, i),
 				&per_cpu(init_sched_entity, i), i, 1);
 
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		init_task_group.rt_runtime =
 			sysctl_sched_rt_runtime * NSEC_PER_USEC;
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -7381,9 +7406,9 @@ void set_curr_task(int cpu, struct task_struct *p)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
 
-#ifdef CONFIG_SMP
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
 /*
  * distribute shares of all task groups among their schedulable entities,
  * to reflect load distribution across cpus.
@@ -7539,20 +7564,28 @@ static void free_sched_group(struct task_group *tg)
 	int i;
 
 	for_each_possible_cpu(i) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
 		if (tg->se)
 			kfree(tg->se[i]);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		if (tg->rt_rq)
 			kfree(tg->rt_rq[i]);
 		if (tg->rt_se)
 			kfree(tg->rt_se[i]);
+#endif
 	}
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	kfree(tg->cfs_rq);
 	kfree(tg->se);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	kfree(tg->rt_rq);
 	kfree(tg->rt_se);
+#endif
 	kfree(tg);
 }
 
@@ -7560,10 +7593,14 @@ static void free_sched_group(struct task_group *tg)
 struct task_group *sched_create_group(void)
 {
 	struct task_group *tg;
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct rt_rq *rt_rq;
 	struct sched_rt_entity *rt_se;
+#endif
 	struct rq *rq;
 	unsigned long flags;
 	int i;
@@ -7572,12 +7609,18 @@ struct task_group *sched_create_group(void)
 	if (!tg)
 		return ERR_PTR(-ENOMEM);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->cfs_rq)
 		goto err;
 	tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
 	if (!tg->se)
 		goto err;
+
+	tg->shares = NICE_0_LOAD;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
 	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
 	if (!tg->rt_rq)
 		goto err;
@@ -7585,12 +7628,13 @@ struct task_group *sched_create_group(void)
 	if (!tg->rt_se)
 		goto err;
 
-	tg->shares = NICE_0_LOAD;
 	tg->rt_runtime = 0;
+#endif
 
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!cfs_rq)
@@ -7601,6 +7645,10 @@ struct task_group *sched_create_group(void)
 		if (!se)
 			goto err;
 
+		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
 		rt_rq = kmalloc_node(sizeof(struct rt_rq),
 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 		if (!rt_rq)
@@ -7611,17 +7659,21 @@ struct task_group *sched_create_group(void)
 		if (!rt_se)
 			goto err;
 
-		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
+#endif
 	}
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		cfs_rq = tg->cfs_rq[i];
 		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 		rt_rq = tg->rt_rq[i];
 		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+#endif
 	}
 	list_add_rcu(&tg->list, &task_groups);
 	spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7643,23 +7695,21 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
-	struct cfs_rq *cfs_rq = NULL;
-	struct rt_rq *rt_rq = NULL;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i) {
-		cfs_rq = tg->cfs_rq[i];
-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-		rt_rq = tg->rt_rq[i];
-		list_del_rcu(&rt_rq->leaf_rt_rq_list);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+		list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+		list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
+#endif
 	}
 	list_del_rcu(&tg->list);
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
-	BUG_ON(!cfs_rq);
-
 	/* wait for possible concurrent references to cfs_rqs complete */
 	call_rcu(&tg->rcu, free_sched_group_rcu);
 }
@@ -7699,6 +7749,7 @@ void sched_move_task(struct task_struct *tsk)
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 /* rq->lock to be locked by caller */
 static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
@@ -7786,7 +7837,9 @@ unsigned long sched_group_shares(struct task_group *tg)
 {
 	return tg->shares;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 /*
  * Ensure that the real time constraints are schedulable.
  */
@@ -7858,9 +7911,10 @@ long sched_group_rt_runtime(struct task_group *tg)
 	do_div(rt_runtime_us, NSEC_PER_USEC);
 	return rt_runtime_us;
 }
-#endif	/* CONFIG_FAIR_GROUP_SCHED */
+#endif
+#endif	/* CONFIG_GROUP_SCHED */
 
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 
 /* return corresponding task_group object of a cgroup */
 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
@@ -7920,6 +7974,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	sched_move_task(tsk);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
 				u64 shareval)
 {
@@ -7932,7 +7987,9 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
 
 	return (u64) tg->shares;
 }
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
@@ -7977,18 +8034,23 @@ static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
 
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
+#endif
 
 static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	{
 		.name = "shares",
 		.read_uint = cpu_shares_read_uint,
 		.write_uint = cpu_shares_write_uint,
 	},
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	{
 		.name = "rt_runtime_us",
 		.read = cpu_rt_runtime_read,
 		.write = cpu_rt_runtime_write,
 	},
+#endif
 };
 
 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -8007,7 +8069,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.early_init	= 1,
 };
 
-#endif	/* CONFIG_FAIR_CGROUP_SCHED */
+#endif	/* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_CPUACCT
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 35825b28e429..f54792b175b2 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -55,7 +55,7 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 	return !list_empty(&rt_se->run_list);
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 
 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 {
@@ -177,7 +177,7 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 {
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
 
 	if (rt_rq)
@@ -269,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	rt_rq->rt_nr_running++;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_se_prio(rt_se) < rt_rq->highest_prio)
 		rt_rq->highest_prio = rt_se_prio(rt_se);
 #endif
@@ -281,7 +281,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	if (rt_se_boosted(rt_se))
 		rt_rq->rt_nr_boosted++;
 #endif
@@ -293,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running--;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	if (rt_rq->rt_nr_running) {
 		struct rt_prio_array *array;
 
@@ -315,7 +315,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
 	if (rt_se_boosted(rt_se))
 		rt_rq->rt_nr_boosted--;
 
diff --git a/kernel/user.c b/kernel/user.c
index 9f6d471bfd03..7132022a040c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -57,7 +57,7 @@ struct user_struct root_user = {
 	.uid_keyring	= &root_user_keyring,
 	.session_keyring = &root_session_keyring,
 #endif
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 	.tg		= &init_task_group,
 #endif
 };
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
 	return NULL;
 }
 
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
 
 static void sched_destroy_user(struct user_struct *up)
 {
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p)
 	sched_move_task(p);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED */
+#else	/* CONFIG_USER_SCHED */
 
 static void sched_destroy_user(struct user_struct *up) { }
 static int sched_create_user(struct user_struct *up) { return 0; }
 static void sched_switch_user(struct task_struct *p) { }
 
-#endif	/* CONFIG_FAIR_USER_SCHED */
+#endif	/* CONFIG_USER_SCHED */
 
-#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
 
 static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
 static DEFINE_MUTEX(uids_mutex);
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void)
 }
 
 /* uid directory attributes */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static ssize_t cpu_shares_show(struct kobject *kobj,
 			       struct kobj_attribute *attr,
 			       char *buf)
@@ -163,7 +164,9 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
 
 static struct kobj_attribute cpu_share_attr =
 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
+#endif
 
+#ifdef CONFIG_RT_GROUP_SCHED
 static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
 				   struct kobj_attribute *attr,
 				   char *buf)
@@ -190,11 +193,16 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
 
 static struct kobj_attribute cpu_rt_runtime_attr =
 	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+#endif
 
 /* default attributes per uid directory */
 static struct attribute *uids_attributes[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	&cpu_share_attr.attr,
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
 	&cpu_rt_runtime_attr.attr,
+#endif
 	NULL
 };
 
@@ -297,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
 	schedule_work(&up->work);
 }
 
-#else	/* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
+#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
 
 int uids_sysfs_init(void) { return 0; }
 static inline int uids_user_create(struct user_struct *up) { return 0; }
@@ -401,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 		spin_lock_irq(&uidhash_lock);
 		up = uid_hash_find(uid, hashent);
 		if (up) {
-			/* This case is not possible when CONFIG_FAIR_USER_SCHED
+			/* This case is not possible when CONFIG_USER_SCHED
 			 * is defined, since we serialize alloc_uid() using
 			 * uids_mutex. Hence no need to call
 			 * sched_destroy_user() or remove_user_sysfs_dir().
-- 
cgit 


From b3c97528689619fc66569b30bf83d09d9929521a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 13 Feb 2008 15:03:15 -0800
Subject: include/linux: Remove all users of FASTCALL() macro

FASTCALL() is always expanded to empty, remove it.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/aio.h            | 20 ++++++++++----------
 include/linux/buffer_head.h    |  6 +++---
 include/linux/file.h           | 16 ++++++++--------
 include/linux/gfp.h            | 15 +++++++--------
 include/linux/interrupt.h      |  8 ++++----
 include/linux/mm.h             |  4 ++--
 include/linux/mutex-debug.h    |  2 +-
 include/linux/namei.h          |  6 +++---
 include/linux/netdevice.h      |  2 +-
 include/linux/pagemap.h        | 10 +++++-----
 include/linux/pid.h            | 21 ++++++++++-----------
 include/linux/rwsem-spinlock.h | 16 ++++++++--------
 include/linux/sched.h          | 14 +++++++-------
 include/linux/swap.h           |  8 ++++----
 include/linux/wait.h           | 34 ++++++++++++++++------------------
 include/linux/workqueue.h      | 13 ++++++-------
 16 files changed, 95 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7ef8de662001..a9931e2e5624 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -206,21 +206,21 @@ struct kioctx {
 /* prototypes */
 extern unsigned aio_max_size;
 
-extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
-extern int FASTCALL(aio_put_req(struct kiocb *iocb));
-extern void FASTCALL(kick_iocb(struct kiocb *iocb));
-extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
-extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
+extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
+extern int aio_put_req(struct kiocb *iocb);
+extern void kick_iocb(struct kiocb *iocb);
+extern int aio_complete(struct kiocb *iocb, long res, long res2);
+extern void __put_ioctx(struct kioctx *ctx);
 struct mm_struct;
-extern void FASTCALL(exit_aio(struct mm_struct *mm));
+extern void exit_aio(struct mm_struct *mm);
 extern struct kioctx *lookup_ioctx(unsigned long ctx_id);
-extern int FASTCALL(io_submit_one(struct kioctx *ctx,
-			struct iocb __user *user_iocb, struct iocb *iocb));
+extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+			 struct iocb *iocb);
 
 /* semi private, but used by the 32bit emulations: */
 struct kioctx *lookup_ioctx(unsigned long ctx_id);
-int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-				  struct iocb *iocb));
+int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+		  struct iocb *iocb);
 
 #define get_ioctx(kioctx) do {						\
 	BUG_ON(atomic_read(&(kioctx)->users) <= 0);			\
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e98801f06dcc..932eb02a2753 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -144,7 +144,7 @@ BUFFER_FNS(Unwritten, unwritten)
  * Declarations
  */
 
-void FASTCALL(mark_buffer_dirty(struct buffer_head *bh));
+void mark_buffer_dirty(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset);
@@ -185,8 +185,8 @@ struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
-void FASTCALL(unlock_buffer(struct buffer_head *bh));
-void FASTCALL(__lock_buffer(struct buffer_head *bh));
+void unlock_buffer(struct buffer_head *bh);
+void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int submit_bh(int, struct buffer_head *);
diff --git a/include/linux/file.h b/include/linux/file.h
index 56023c74e9fd..7239baac81a9 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -59,8 +59,8 @@ struct files_struct {
 
 extern struct kmem_cache *filp_cachep;
 
-extern void FASTCALL(__fput(struct file *));
-extern void FASTCALL(fput(struct file *));
+extern void __fput(struct file *);
+extern void fput(struct file *);
 
 struct file_operations;
 struct vfsmount;
@@ -77,13 +77,13 @@ static inline void fput_light(struct file *file, int fput_needed)
 		fput(file);
 }
 
-extern struct file * FASTCALL(fget(unsigned int fd));
-extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed));
-extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
+extern struct file *fget(unsigned int fd);
+extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern int get_unused_fd_flags(int flags);
-extern void FASTCALL(put_unused_fd(unsigned int fd));
+extern void put_unused_fd(unsigned int fd);
 struct kmem_cache;
 
 extern int expand_files(struct files_struct *, int nr);
@@ -110,12 +110,12 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
  */
 #define fcheck(fd)	fcheck_files(current->files, fd)
 
-extern void FASTCALL(fd_install(unsigned int fd, struct file * file));
+extern void fd_install(unsigned int fd, struct file *file);
 
 struct task_struct;
 
 struct files_struct *get_files_struct(struct task_struct *);
-void FASTCALL(put_files_struct(struct files_struct *fs));
+void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct task_struct *, struct files_struct *);
 
 extern struct kmem_cache *files_cachep;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0c6ce515185d..164be9da3c1b 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -172,8 +172,7 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-extern struct page *
-FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
+extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
@@ -209,8 +208,8 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask,
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order));
-extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
+extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
+extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
@@ -218,10 +217,10 @@ extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
 #define __get_dma_pages(gfp_mask, order) \
 		__get_free_pages((gfp_mask) | GFP_DMA,(order))
 
-extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
-extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
-extern void FASTCALL(free_hot_page(struct page *page));
-extern void FASTCALL(free_cold_page(struct page *page));
+extern void __free_pages(struct page *page, unsigned int order);
+extern void free_pages(unsigned long addr, unsigned int order);
+extern void free_hot_page(struct page *page);
+extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index dea7598aeff4..f8ab4ce70564 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -273,8 +273,8 @@ asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
-extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
-extern void FASTCALL(raise_softirq(unsigned int nr));
+extern void raise_softirq_irqoff(unsigned int nr);
+extern void raise_softirq(unsigned int nr);
 
 
 /* Tasklets --- multithreaded analogue of BHs.
@@ -341,7 +341,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 #define tasklet_unlock(t) do { } while (0)
 #endif
 
-extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+extern void __tasklet_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_schedule(struct tasklet_struct *t)
 {
@@ -349,7 +349,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
 		__tasklet_schedule(t);
 }
 
-extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e8abb3814209..26c7124b841a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -786,7 +786,7 @@ int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-int FASTCALL(set_page_dirty(struct page *page));
+int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
@@ -829,7 +829,7 @@ extern void unregister_shrinker(struct shrinker *);
 
 int vma_wants_writenotify(struct vm_area_struct *vma);
 
-extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
+extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
 
 #ifdef __PAGETABLE_PUD_FOLDED
 static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 2537285e1064..731d77d6e155 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -18,6 +18,6 @@ do {									\
 	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
 
-extern void FASTCALL(mutex_destroy(struct mutex *lock));
+extern void mutex_destroy(struct mutex *lock);
 
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4cb4f8d2f78d..c13e411491f4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -62,13 +62,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_ACCESS		(0x0400)
 #define LOOKUP_CHDIR		(0x0800)
 
-extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
-extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
+extern int __user_walk(const char __user *, unsigned, struct nameidata *);
+extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
 #define user_path_walk(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, 0, nd)
-extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 extern void path_release(struct nameidata *);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 047d432bde55..7128a02f1d37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@ enum
 	NAPI_STATE_DISABLE,	/* Disable pending */
 };
 
-extern void FASTCALL(__napi_schedule(struct napi_struct *n));
+extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
 {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b62a105622b..d2fca802f809 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -156,10 +156,10 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 }
 
-extern void FASTCALL(__lock_page(struct page *page));
-extern int FASTCALL(__lock_page_killable(struct page *page));
-extern void FASTCALL(__lock_page_nosync(struct page *page));
-extern void FASTCALL(unlock_page(struct page *page));
+extern void __lock_page(struct page *page);
+extern int __lock_page_killable(struct page *page);
+extern void __lock_page_nosync(struct page *page);
+extern void unlock_page(struct page *page);
 
 /*
  * lock_page may only be called if we have the page's inode pinned.
@@ -199,7 +199,7 @@ static inline void lock_page_nosync(struct page *page)
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  * Never use this directly!
  */
-extern void FASTCALL(wait_on_page_bit(struct page *page, int bit_nr));
+extern void wait_on_page_bit(struct page *page, int bit_nr);
 
 /* 
  * Wait for a page to be unlocked.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index f84d532b5d23..c7980810eb09 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -79,10 +79,9 @@ static inline struct pid *get_pid(struct pid *pid)
 	return pid;
 }
 
-extern void FASTCALL(put_pid(struct pid *pid));
-extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
-extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
-						enum pid_type));
+extern void put_pid(struct pid *pid);
+extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
+extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
 
 extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
 
@@ -90,11 +89,11 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
  * attach_pid() and detach_pid() must be called with the tasklist_lock
  * write-held.
  */
-extern int FASTCALL(attach_pid(struct task_struct *task,
-				enum pid_type type, struct pid *pid));
-extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
-extern void FASTCALL(transfer_pid(struct task_struct *old,
-				  struct task_struct *new, enum pid_type));
+extern int attach_pid(struct task_struct *task, enum pid_type type,
+		      struct pid *pid);
+extern void detach_pid(struct task_struct *task, enum pid_type);
+extern void transfer_pid(struct task_struct *old, struct task_struct *new,
+			 enum pid_type);
 
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
@@ -109,7 +108,7 @@ extern struct pid_namespace init_pid_ns;
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
 extern struct pid *find_pid(int nr);
 
@@ -121,7 +120,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 int next_pidmap(struct pid_namespace *pid_ns, int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
-extern void FASTCALL(free_pid(struct pid *pid));
+extern void free_pid(struct pid *pid);
 
 /*
  * the helpers to get the pid's id seen from different namespaces
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 813cee13da0d..6c3c0f6c261f 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -60,14 +60,14 @@ do {								\
 	__init_rwsem((sem), #sem, &__key);			\
 } while (0)
 
-extern void FASTCALL(__down_read(struct rw_semaphore *sem));
-extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
-extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__up_read(struct rw_semaphore *sem));
-extern void FASTCALL(__up_write(struct rw_semaphore *sem));
-extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
+extern void __down_read(struct rw_semaphore *sem);
+extern int __down_read_trylock(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __down_write_trylock(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
+extern void __up_write(struct rw_semaphore *sem);
+extern void __downgrade_write(struct rw_semaphore *sem);
 
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b9bb313fe1ae..e217d188a102 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -323,7 +323,7 @@ extern char __sched_text_start[], __sched_text_end[];
 extern int in_sched_functions(unsigned long addr);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
@@ -1648,10 +1648,10 @@ extern void release_uids(struct user_namespace *ns);
 
 extern void do_timer(unsigned long ticks);
 
-extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
-						unsigned long clone_flags));
+extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+extern int wake_up_process(struct task_struct *tsk);
+extern void wake_up_new_task(struct task_struct *tsk,
+				unsigned long clone_flags);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
@@ -1741,7 +1741,7 @@ static inline int sas_ss_flags(unsigned long sp)
 extern struct mm_struct * mm_alloc(void);
 
 /* mmdrop drops the mm and the page tables */
-extern void FASTCALL(__mmdrop(struct mm_struct *));
+extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct * mm)
 {
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
@@ -1925,7 +1925,7 @@ static inline int signal_pending(struct task_struct *p)
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+extern int __fatal_signal_pending(struct task_struct *p);
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3ca5c4bd6d3f..878459ae0454 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -171,10 +171,10 @@ extern unsigned int nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void FASTCALL(lru_cache_add(struct page *));
-extern void FASTCALL(lru_cache_add_active(struct page *));
-extern void FASTCALL(activate_page(struct page *));
-extern void FASTCALL(mark_page_accessed(struct page *));
+extern void lru_cache_add(struct page *);
+extern void lru_cache_add_active(struct page *);
+extern void activate_page(struct page *);
+extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern int lru_add_drain_all(void);
 extern int rotate_reclaimable_page(struct page *page);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 33a2aa9e02f2..0081147a9fe8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -117,9 +117,9 @@ static inline int waitqueue_active(wait_queue_head_t *q)
  */
 #define is_sync_wait(wait)	(!(wait) || ((wait)->private))
 
-extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
+extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 
 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 {
@@ -141,16 +141,16 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
 	list_del(&old->task_list);
 }
 
-void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
-extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
-void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
-int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-void FASTCALL(wake_up_bit(void *, int));
-int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned));
-int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned));
-wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+void __wake_up_bit(wait_queue_head_t *, void *, int);
+int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+void wake_up_bit(void *, int);
+int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
+int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
+wait_queue_head_t *bit_waitqueue(void *, int);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -437,11 +437,9 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7f28c32d9aca..542526c6e8ef 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -178,18 +178,17 @@ __create_workqueue_key(const char *name, int singlethread,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay));
+extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
-extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct delayed_work *work,
-					unsigned long delay));
+extern int schedule_work(struct work_struct *work);
+extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-- 
cgit 


From 21534301ea1801783bd88fba2a2e617ee4d2bd28 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 13 Feb 2008 15:03:17 -0800
Subject: Final removal of FASTCALL()/fastcall

All users are gone, remove definitions and comments referring
to them.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/drm/i830_dma.c   | 2 +-
 include/asm-mn10300/highmem.h | 4 ++--
 include/asm-mn10300/linkage.h | 2 --
 include/linux/irq.h           | 1 -
 include/linux/linkage.h       | 5 -----
 5 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 379cbdad4921..9df08105f4f3 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -36,7 +36,7 @@
 #include "i830_drm.h"
 #include "i830_drv.h"
 #include <linux/interrupt.h>	/* For task queue support */
-#include <linux/pagemap.h>	/* For FASTCALL on unlock_page() */
+#include <linux/pagemap.h>
 #include <linux/delay.h>
 #include <asm/uaccess.h>
 
diff --git a/include/asm-mn10300/highmem.h b/include/asm-mn10300/highmem.h
index 383c0c42982e..5256854c0453 100644
--- a/include/asm-mn10300/highmem.h
+++ b/include/asm-mn10300/highmem.h
@@ -42,8 +42,8 @@ extern void __init kmap_init(void);
 #define PKMAP_NR(virt)  ((virt - PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-extern unsigned long __fastcall kmap_high(struct page *page);
-extern void __fastcall kunmap_high(struct page *page);
+extern unsigned long kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
 
 static inline unsigned long kmap(struct page *page)
 {
diff --git a/include/asm-mn10300/linkage.h b/include/asm-mn10300/linkage.h
index 29a32e467523..dda3002a5dfa 100644
--- a/include/asm-mn10300/linkage.h
+++ b/include/asm-mn10300/linkage.h
@@ -13,8 +13,6 @@
 
 /* don't override anything */
 #define asmlinkage
-#define FASTCALL(x) x
-#define fastcall
 
 #define __ALIGN		.align 4,0xcb
 #define __ALIGN_STR	".align 4,0xcb"
diff --git a/include/linux/irq.h b/include/linux/irq.h
index bfd9efb5cb49..176e5e790a44 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -285,7 +285,6 @@ extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 
 /*
  * Monolithic do_IRQ implementation.
- * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
 #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 extern unsigned int __do_IRQ(unsigned int irq);
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 3faf599ea58e..0592936344c4 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -73,9 +73,4 @@
 #define ATTRIB_NORET  __attribute__((noreturn))
 #define NORET_AND     noreturn,
 
-#ifndef FASTCALL
-#define FASTCALL(x)	x
-#define fastcall
-#endif
-
 #endif
-- 
cgit 


From 2695a14d315c014474ccadbaed40b0169b00cb5b Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Wed, 13 Feb 2008 15:03:17 -0800
Subject: SC26XX: missing PORT define in serial_core.h

When submitting the driver for inclusion to 2.6.25 I've missed the change to
serial_core.h. This patch fixes this.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1a0b6cf83ff1..289942fc6655 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -149,6 +149,8 @@
 /* Freescale ColdFire */
 #define PORT_MCF	78
 
+#define PORT_SC26XX	79
+
 
 /* MN10300 on-chip UART numbers */
 #define PORT_MN10300		80
-- 
cgit 


From 064d9efe947542097be669581f82d6b097e81d1a Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 13 Feb 2008 15:03:19 -0800
Subject: hugetlb: fix overcommit locking

proc_doulongvec_minmax() calls copy_to_user()/copy_from_user(), so we can't
hold hugetlb_lock over the call.  Use a dummy variable to store the sysctl
result, like in hugetlb_sysctl_handler(), then grab the lock to update
nr_overcommit_huge_pages.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Reported-by: Miles Lane <miles.lane@gmail.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 2 +-
 kernel/sysctl.c         | 4 ++--
 mm/hugetlb.c            | 6 ++++--
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7ca198b379af..addca4cd4f11 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -33,8 +33,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
+extern unsigned long sysctl_overcommit_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
-extern unsigned long nr_overcommit_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 924c674b76ea..8b7e95411795 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -978,8 +978,8 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nr_overcommit_hugepages",
-		.data		= &nr_overcommit_huge_pages,
-		.maxlen		= sizeof(nr_overcommit_huge_pages),
+		.data		= &sysctl_overcommit_huge_pages,
+		.maxlen		= sizeof(sysctl_overcommit_huge_pages),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_overcommit_handler,
 	},
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d9a380312467..cb1b3a7ecdfc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,14 +24,15 @@
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
 static unsigned long surplus_huge_pages;
+static unsigned long nr_overcommit_huge_pages;
 unsigned long max_huge_pages;
+unsigned long sysctl_overcommit_huge_pages;
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
 static unsigned int free_huge_pages_node[MAX_NUMNODES];
 static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
-unsigned long nr_overcommit_huge_pages;
 static int hugetlb_next_nid;
 
 /*
@@ -609,8 +610,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 			struct file *file, void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
-	spin_lock(&hugetlb_lock);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	spin_lock(&hugetlb_lock);
+	nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
 	spin_unlock(&hugetlb_lock);
 	return 0;
 }
-- 
cgit 


From bc2cda1ebd4430f55deb60f0193a3e3b835499a2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 13 Feb 2008 15:03:25 -0800
Subject: docbook: make a networking book and fix a few errors

Move networking (core and drivers) docbook to its own networking book.
Fix a few kernel-doc errors in header and source files.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/Makefile        |   2 +-
 Documentation/DocBook/kernel-api.tmpl |  65 ---------------------
 Documentation/DocBook/networking.tmpl | 106 ++++++++++++++++++++++++++++++++++
 include/linux/etherdevice.h           |   3 +-
 net/core/dev.c                        |   3 +-
 net/core/skbuff.c                     |   4 +-
 6 files changed, 111 insertions(+), 72 deletions(-)
 create mode 100644 Documentation/DocBook/networking.tmpl

(limited to 'include/linux')

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 6a0ad4715e9f..300e1707893f 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -8,7 +8,7 @@
 
 DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
-	    procfs-guide.xml writing_usb_driver.xml \
+	    procfs-guide.xml writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 6c0e5f018615..7e054c9124e6 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -204,71 +204,6 @@ X!Ilib/string.c
      </sect1>
   </chapter>
 
-  <chapter id="netcore">
-     <title>Linux Networking</title>
-     <sect1><title>Networking Base Types</title>
-!Iinclude/linux/net.h
-     </sect1>
-     <sect1><title>Socket Buffer Functions</title>
-!Iinclude/linux/skbuff.h
-!Iinclude/net/sock.h
-!Enet/socket.c
-!Enet/core/skbuff.c
-!Enet/core/sock.c
-!Enet/core/datagram.c
-!Enet/core/stream.c
-     </sect1>
-     <sect1><title>Socket Filter</title>
-!Enet/core/filter.c
-     </sect1>
-     <sect1><title>Generic Network Statistics</title>
-!Iinclude/linux/gen_stats.h
-!Enet/core/gen_stats.c
-!Enet/core/gen_estimator.c
-     </sect1>
-     <sect1><title>SUN RPC subsystem</title>
-<!-- The !D functionality is not perfect, garbage has to be protected by comments
-!Dnet/sunrpc/sunrpc_syms.c
--->
-!Enet/sunrpc/xdr.c
-!Enet/sunrpc/svc_xprt.c
-!Enet/sunrpc/xprt.c
-!Enet/sunrpc/sched.c
-!Enet/sunrpc/socklib.c
-!Enet/sunrpc/stats.c
-!Enet/sunrpc/rpc_pipe.c
-!Enet/sunrpc/rpcb_clnt.c
-!Enet/sunrpc/clnt.c
-     </sect1>
-  </chapter>
-
-  <chapter id="netdev">
-     <title>Network device support</title>
-     <sect1><title>Driver Support</title>
-!Enet/core/dev.c
-!Enet/ethernet/eth.c
-!Enet/sched/sch_generic.c
-!Iinclude/linux/etherdevice.h
-!Iinclude/linux/netdevice.h
-     </sect1>
-     <sect1><title>PHY Support</title>
-!Edrivers/net/phy/phy.c
-!Idrivers/net/phy/phy.c
-!Edrivers/net/phy/phy_device.c
-!Idrivers/net/phy/phy_device.c
-!Edrivers/net/phy/mdio_bus.c
-!Idrivers/net/phy/mdio_bus.c
-     </sect1>
-<!-- FIXME: Removed for now since no structured comments in source
-     <sect1><title>Wireless</title>
-X!Enet/core/wireless.c
-     </sect1>
--->
-     <sect1><title>Synchronous PPP</title>
-!Edrivers/net/wan/syncppp.c
-     </sect1>
-  </chapter>
-
   <chapter id="modload">
      <title>Module Support</title>
      <sect1><title>Module Loading</title>
diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl
new file mode 100644
index 000000000000..f24f9e85e4ae
--- /dev/null
+++ b/Documentation/DocBook/networking.tmpl
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxNetworking">
+ <bookinfo>
+  <title>Linux Networking and Network Devices APIs</title>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="netcore">
+     <title>Linux Networking</title>
+     <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+     </sect1>
+     <sect1><title>Socket Buffer Functions</title>
+!Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
+!Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
+     </sect1>
+     <sect1><title>Socket Filter</title>
+!Enet/core/filter.c
+     </sect1>
+     <sect1><title>Generic Network Statistics</title>
+!Iinclude/linux/gen_stats.h
+!Enet/core/gen_stats.c
+!Enet/core/gen_estimator.c
+     </sect1>
+     <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svc_xprt.c
+!Enet/sunrpc/xprt.c
+!Enet/sunrpc/sched.c
+!Enet/sunrpc/socklib.c
+!Enet/sunrpc/stats.c
+!Enet/sunrpc/rpc_pipe.c
+!Enet/sunrpc/rpcb_clnt.c
+!Enet/sunrpc/clnt.c
+     </sect1>
+  </chapter>
+
+  <chapter id="netdev">
+     <title>Network device support</title>
+     <sect1><title>Driver Support</title>
+!Enet/core/dev.c
+!Enet/ethernet/eth.c
+!Enet/sched/sch_generic.c
+!Iinclude/linux/etherdevice.h
+!Iinclude/linux/netdevice.h
+     </sect1>
+     <sect1><title>PHY Support</title>
+!Edrivers/net/phy/phy.c
+!Idrivers/net/phy/phy.c
+!Edrivers/net/phy/phy_device.c
+!Idrivers/net/phy/phy_device.c
+!Edrivers/net/phy/mdio_bus.c
+!Idrivers/net/phy/mdio_bus.c
+     </sect1>
+<!-- FIXME: Removed for now since no structured comments in source
+     <sect1><title>Wireless</title>
+X!Enet/core/wireless.c
+     </sect1>
+-->
+     <sect1><title>Synchronous PPP</title>
+!Edrivers/net/wan/syncppp.c
+     </sect1>
+  </chapter>
+
+</book>
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index b7558ec81ed5..25d62e6e3290 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -70,8 +70,7 @@ static inline int is_multicast_ether_addr(const u8 *addr)
 }
 
 /**
- * is_local_ether_addr - Determine if the Ethernet address is locally-assigned
- * one (IEEE 802).
+ * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802).
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
  * Return true if the address is a local address.
diff --git a/net/core/dev.c b/net/core/dev.c
index b2f6cb5e0f72..b3e19ae57f95 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3038,8 +3038,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 EXPORT_SYMBOL(dev_unicast_sync);
 
 /**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination
- *			     device
+ *	dev_unicast_unsync - Remove synchronized addresses from the destination device
  *	@to: destination device
  *	@from: source device
  *
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e354221ec23..cfc07dac636c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1907,11 +1907,11 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
  * of bytes already consumed and the next call to
  * skb_seq_read() will return the remaining part of the block.
  *
- * Note: The size of each block of data returned can be arbitary,
+ * Note 1: The size of each block of data returned can be arbitary,
  *       this limitation is the cost for zerocopy seqeuental
  *       reads of potentially non linear data.
  *
- * Note: Fragment lists within fragments are not implemented
+ * Note 2: Fragment lists within fragments are not implemented
  *       at the moment, state->root_skb could be replaced with
  *       a stack for this purpose.
  */
-- 
cgit 


From 91d35dd93e14c34539a8005183ea500f25caad02 Mon Sep 17 00:00:00 2001
From: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Date: Wed, 13 Feb 2008 15:03:26 -0800
Subject: moduleparam: fix alpha, ia64 and ppc64 compile failures

On alpha, ia64 and ppc64 only relocations to local data can go into
read-only sections. The vast majority of module parameters use the global
generic param_set_*/param_get_* functions, so the 'const' attribute for
struct kernel_param is not only useless, but it also causes compile
failures due to 'section type conflict' in those rare cases where
param_set/get are local functions.

This fixes http://bugzilla.kernel.org/show_bug.cgi?id=8964

Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Adrian Bunk <bunk@stusta.de>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 8126e55c5bdc..ec624381c844 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -62,6 +62,16 @@ struct kparam_array
 	void *elem;
 };
 
+/* On alpha, ia64 and ppc64 relocations to global data cannot go into
+   read-only sections (which is part of respective UNIX ABI on these
+   platforms). So 'const' makes no sense and even causes compile failures
+   with some compilers. */
+#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64)
+#define __moduleparam_const
+#else
+#define __moduleparam_const const
+#endif
+
 /* This is the fundamental function for registering boot/module
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
@@ -71,7 +81,7 @@ struct kparam_array
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2));	\
 	static const char __param_str_##name[] = prefix #name;		\
-	static struct kernel_param const __param_##name			\
+	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
 	= { __param_str_##name, perm, set, get, { arg } }
-- 
cgit 


From fb40bd78b0f91b274879cf5db8facd1e04b6052e Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Wed, 13 Feb 2008 15:03:37 -0800
Subject: Linux Kernel Markers: support multiple probes

RCU style multiple probes support for the Linux Kernel Markers.  Common case
(one probe) is still fast and does not require dynamic allocation or a
supplementary pointer dereference on the fast path.

- Move preempt disable from the marker site to the callback.

Since we now have an internal callback, move the preempt disable/enable to the
callback instead of the marker site.

Since the callback change is done asynchronously (passing from a handler that
supports arguments to a handler that does not setup the arguments is no
arguments are passed), we can safely update it even if it is outside the
preempt disable section.

- Move probe arm to probe connection. Now, a connected probe is automatically
  armed.

Remove MARK_MAX_FORMAT_LEN, unused.

This patch modifies the Linux Kernel Markers API : it removes the probe
"arm/disarm" and changes the probe function prototype : it now expects a
va_list * instead of a "...".

If we want to have more than one probe connected to a marker at a given
time (LTTng, or blktrace, ssytemtap) then we need this patch. Without it,
connecting a second probe handler to a marker will fail.

It allow us, for instance, to do interesting combinations :

Do standard tracing with LTTng and, eventually, to compute statistics
with SystemTAP, or to have a special trigger on an event that would call
a systemtap script which would stop flight recorder tracing.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Mike Mason <mmlnx@us.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: David Smith <dsmith@redhat.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/sputrace.c |  31 +-
 include/linux/marker.h                       |  59 ++-
 include/linux/module.h                       |   2 +-
 kernel/marker.c                              | 677 ++++++++++++++++++++-------
 kernel/module.c                              |   7 +-
 samples/markers/probe-example.c              |  25 +-
 6 files changed, 565 insertions(+), 236 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 2b1953f6f12e..01974f7776e1 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -146,34 +146,28 @@ static void sputrace_log_item(const char *name, struct spu_context *ctx,
 	wake_up(&sputrace_wait);
 }
 
-static void spu_context_event(const struct marker *mdata,
-		void *private, const char *format, ...)
+static void spu_context_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
 {
-	struct spu_probe *p = mdata->private;
-	va_list ap;
+	struct spu_probe *p = probe_private;
 	struct spu_context *ctx;
 	struct spu *spu;
 
-	va_start(ap, format);
-	ctx = va_arg(ap, struct spu_context *);
-	spu = va_arg(ap, struct spu *);
+	ctx = va_arg(*args, struct spu_context *);
+	spu = va_arg(*args, struct spu *);
 
 	sputrace_log_item(p->name, ctx, spu);
-	va_end(ap);
 }
 
-static void spu_context_nospu_event(const struct marker *mdata,
-		void *private, const char *format, ...)
+static void spu_context_nospu_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
 {
-	struct spu_probe *p = mdata->private;
-	va_list ap;
+	struct spu_probe *p = probe_private;
 	struct spu_context *ctx;
 
-	va_start(ap, format);
-	ctx = va_arg(ap, struct spu_context *);
+	ctx = va_arg(*args, struct spu_context *);
 
 	sputrace_log_item(p->name, ctx, NULL);
-	va_end(ap);
 }
 
 struct spu_probe spu_probes[] = {
@@ -219,10 +213,6 @@ static int __init sputrace_init(void)
 		if (error)
 			printk(KERN_INFO "Unable to register probe %s\n",
 					p->name);
-
-		error = marker_arm(p->name);
-		if (error)
-			printk(KERN_INFO "Unable to arm probe %s\n", p->name);
 	}
 
 	return 0;
@@ -238,7 +228,8 @@ static void __exit sputrace_exit(void)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spu_probes); i++)
-		marker_probe_unregister(spu_probes[i].name);
+		marker_probe_unregister(spu_probes[i].name,
+			spu_probes[i].probe_func, &spu_probes[i]);
 
 	remove_proc_entry("sputrace", NULL);
 	kfree(sputrace_log);
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 5f36cf946bcb..b5f95637f289 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -19,16 +19,23 @@ struct marker;
 
 /**
  * marker_probe_func - Type of a marker probe function
- * @mdata: pointer of type struct marker
- * @private_data: caller site private data
+ * @probe_private: probe private data
+ * @call_private: call site private data
  * @fmt: format string
- * @...: variable argument list
+ * @args: variable argument list pointer. Use a pointer to overcome C's
+ *        inability to pass this around as a pointer in a portable manner in
+ *        the callee otherwise.
  *
  * Type of marker probe functions. They receive the mdata and need to parse the
  * format string to recover the variable argument list.
  */
-typedef void marker_probe_func(const struct marker *mdata,
-	void *private_data, const char *fmt, ...);
+typedef void marker_probe_func(void *probe_private, void *call_private,
+		const char *fmt, va_list *args);
+
+struct marker_probe_closure {
+	marker_probe_func *func;	/* Callback */
+	void *probe_private;		/* Private probe data */
+};
 
 struct marker {
 	const char *name;	/* Marker name */
@@ -36,8 +43,11 @@ struct marker {
 				 * variable argument list.
 				 */
 	char state;		/* Marker state. */
-	marker_probe_func *call;/* Probe handler function pointer */
-	void *private;		/* Private probe data */
+	char ptype;		/* probe type : 0 : single, 1 : multi */
+	void (*call)(const struct marker *mdata,	/* Probe wrapper */
+		void *call_private, const char *fmt, ...);
+	struct marker_probe_closure single;
+	struct marker_probe_closure *multi;
 } __attribute__((aligned(8)));
 
 #ifdef CONFIG_MARKERS
@@ -49,7 +59,7 @@ struct marker {
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __trace_mark(name, call_data, format, args...)			\
+#define __trace_mark(name, call_private, format, args...)		\
 	do {								\
 		static const char __mstrtab_name_##name[]		\
 		__attribute__((section("__markers_strings")))		\
@@ -60,24 +70,23 @@ struct marker {
 		static struct marker __mark_##name			\
 		__attribute__((section("__markers"), aligned(8))) =	\
 		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
-		0, __mark_empty_function, NULL };			\
+		0, 0, marker_probe_cb,					\
+		{ __mark_empty_function, NULL}, NULL };			\
 		__mark_check_format(format, ## args);			\
 		if (unlikely(__mark_##name.state)) {			\
-			preempt_disable();				\
 			(*__mark_##name.call)				\
-				(&__mark_##name, call_data,		\
+				(&__mark_##name, call_private,		\
 				format, ## args);			\
-			preempt_enable();				\
 		}							\
 	} while (0)
 
 extern void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module, int *refcount);
+	struct marker *end);
 #else /* !CONFIG_MARKERS */
-#define __trace_mark(name, call_data, format, args...) \
+#define __trace_mark(name, call_private, format, args...) \
 		__mark_check_format(format, ## args)
 static inline void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module, int *refcount)
+	struct marker *end)
 { }
 #endif /* CONFIG_MARKERS */
 
@@ -92,8 +101,6 @@ static inline void marker_update_probe_range(struct marker *begin,
 #define trace_mark(name, format, args...) \
 	__trace_mark(name, NULL, format, ## args)
 
-#define MARK_MAX_FORMAT_LEN	1024
-
 /**
  * MARK_NOARGS - Format string for a marker with no argument.
  */
@@ -106,24 +113,30 @@ static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
 
 extern marker_probe_func __mark_empty_function;
 
+extern void marker_probe_cb(const struct marker *mdata,
+	void *call_private, const char *fmt, ...);
+extern void marker_probe_cb_noarg(const struct marker *mdata,
+	void *call_private, const char *fmt, ...);
+
 /*
  * Connect a probe to a marker.
  * private data pointer must be a valid allocated memory address, or NULL.
  */
 extern int marker_probe_register(const char *name, const char *format,
-				marker_probe_func *probe, void *private);
+				marker_probe_func *probe, void *probe_private);
 
 /*
  * Returns the private data given to marker_probe_register.
  */
-extern void *marker_probe_unregister(const char *name);
+extern int marker_probe_unregister(const char *name,
+	marker_probe_func *probe, void *probe_private);
 /*
  * Unregister a marker by providing the registered private data.
  */
-extern void *marker_probe_unregister_private_data(void *private);
+extern int marker_probe_unregister_private_data(marker_probe_func *probe,
+	void *probe_private);
 
-extern int marker_arm(const char *name);
-extern int marker_disarm(const char *name);
-extern void *marker_get_private_data(const char *name);
+extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
+	int num);
 
 #endif
diff --git a/include/linux/module.h b/include/linux/module.h
index ac28e8761e84..330bec08c2c4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -465,7 +465,7 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
-extern void module_update_markers(struct module *probe_module, int *refcount);
+extern void module_update_markers(void);
 
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
diff --git a/kernel/marker.c b/kernel/marker.c
index 5323cfaedbce..c4c2cd8b61f5 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -27,22 +27,15 @@
 extern struct marker __start___markers[];
 extern struct marker __stop___markers[];
 
+/* Set to 1 to enable marker debug output */
+const int marker_debug;
+
 /*
  * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers, the hash table and deferred_sync.
+ * and module markers and the hash table.
  */
 static DEFINE_MUTEX(markers_mutex);
 
-/*
- * Marker deferred synchronization.
- * Upon marker probe_unregister, we delay call to synchronize_sched() to
- * accelerate mass unregistration (only when there is no more reference to a
- * given module do we call synchronize_sched()). However, we need to make sure
- * every critical region has ended before we re-arm a marker that has been
- * unregistered and then registered back with a different probe data.
- */
-static int deferred_sync;
-
 /*
  * Marker hash table, containing the active markers.
  * Protected by module_mutex.
@@ -50,12 +43,26 @@ static int deferred_sync;
 #define MARKER_HASH_BITS 6
 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
 
+/*
+ * Note about RCU :
+ * It is used to make sure every handler has finished using its private data
+ * between two consecutive operation (add or remove) on a given marker.  It is
+ * also used to delay the free of multiple probes array until a quiescent state
+ * is reached.
+ * marker entries modifications are protected by the markers_mutex.
+ */
 struct marker_entry {
 	struct hlist_node hlist;
 	char *format;
-	marker_probe_func *probe;
-	void *private;
+	void (*call)(const struct marker *mdata,	/* Probe wrapper */
+		void *call_private, const char *fmt, ...);
+	struct marker_probe_closure single;
+	struct marker_probe_closure *multi;
 	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	char rcu_pending:1;
+	char ptype:1;
 	char name[0];	/* Contains name'\0'format'\0' */
 };
 
@@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
 
 /**
  * __mark_empty_function - Empty probe callback
- * @mdata: pointer of type const struct marker
+ * @probe_private: probe private data
+ * @call_private: call site private data
  * @fmt: format string
  * @...: variable argument list
  *
@@ -72,12 +80,266 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
  * though the function pointer change and the marker enabling are two distinct
  * operations that modifies the execution flow of preemptible code.
  */
-void __mark_empty_function(const struct marker *mdata, void *private,
-	const char *fmt, ...)
+void __mark_empty_function(void *probe_private, void *call_private,
+	const char *fmt, va_list *args)
 {
 }
 EXPORT_SYMBOL_GPL(__mark_empty_function);
 
+/*
+ * marker_probe_cb Callback that prepares the variable argument list for probes.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @fmt: format string
+ * @...:  Variable argument list.
+ *
+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
+ * need to put a full smp_rmb() in this branch. This is why we do not use
+ * rcu_dereference() for the pointer read.
+ */
+void marker_probe_cb(const struct marker *mdata, void *call_private,
+	const char *fmt, ...)
+{
+	va_list args;
+	char ptype;
+
+	/*
+	 * disabling preemption to make sure the teardown of the callbacks can
+	 * be done correctly when they are in modules and they insure RCU read
+	 * coherency.
+	 */
+	preempt_disable();
+	ptype = ACCESS_ONCE(mdata->ptype);
+	if (likely(!ptype)) {
+		marker_probe_func *func;
+		/* Must read the ptype before ptr. They are not data dependant,
+		 * so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func = ACCESS_ONCE(mdata->single.func);
+		/* Must read the ptr before private data. They are not data
+		 * dependant, so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		va_start(args, fmt);
+		func(mdata->single.probe_private, call_private, fmt, &args);
+		va_end(args);
+	} else {
+		struct marker_probe_closure *multi;
+		int i;
+		/*
+		 * multi points to an array, therefore accessing the array
+		 * depends on reading multi. However, even in this case,
+		 * we must insure that the pointer is read _before_ the array
+		 * data. Same as rcu_dereference, but we need a full smp_rmb()
+		 * in the fast path, so put the explicit barrier here.
+		 */
+		smp_read_barrier_depends();
+		multi = ACCESS_ONCE(mdata->multi);
+		for (i = 0; multi[i].func; i++) {
+			va_start(args, fmt);
+			multi[i].func(multi[i].probe_private, call_private, fmt,
+				&args);
+			va_end(args);
+		}
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb);
+
+/*
+ * marker_probe_cb Callback that does not prepare the variable argument list.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @fmt: format string
+ * @...:  Variable argument list.
+ *
+ * Should be connected to markers "MARK_NOARGS".
+ */
+void marker_probe_cb_noarg(const struct marker *mdata,
+	void *call_private, const char *fmt, ...)
+{
+	va_list args;	/* not initialized */
+	char ptype;
+
+	preempt_disable();
+	ptype = ACCESS_ONCE(mdata->ptype);
+	if (likely(!ptype)) {
+		marker_probe_func *func;
+		/* Must read the ptype before ptr. They are not data dependant,
+		 * so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func = ACCESS_ONCE(mdata->single.func);
+		/* Must read the ptr before private data. They are not data
+		 * dependant, so we put an explicit smp_rmb() here. */
+		smp_rmb();
+		func(mdata->single.probe_private, call_private, fmt, &args);
+	} else {
+		struct marker_probe_closure *multi;
+		int i;
+		/*
+		 * multi points to an array, therefore accessing the array
+		 * depends on reading multi. However, even in this case,
+		 * we must insure that the pointer is read _before_ the array
+		 * data. Same as rcu_dereference, but we need a full smp_rmb()
+		 * in the fast path, so put the explicit barrier here.
+		 */
+		smp_read_barrier_depends();
+		multi = ACCESS_ONCE(mdata->multi);
+		for (i = 0; multi[i].func; i++)
+			multi[i].func(multi[i].probe_private, call_private, fmt,
+				&args);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct marker_entry *entry = container_of(head,
+		struct marker_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void debug_print_probes(struct marker_entry *entry)
+{
+	int i;
+
+	if (!marker_debug)
+		return;
+
+	if (!entry->ptype) {
+		printk(KERN_DEBUG "Single probe : %p %p\n",
+			entry->single.func,
+			entry->single.probe_private);
+	} else {
+		for (i = 0; entry->multi[i].func; i++)
+			printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
+				entry->multi[i].func,
+				entry->multi[i].probe_private);
+	}
+}
+
+static struct marker_probe_closure *
+marker_entry_add_probe(struct marker_entry *entry,
+		marker_probe_func *probe, void *probe_private)
+{
+	int nr_probes = 0;
+	struct marker_probe_closure *old, *new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->multi;
+	if (!entry->ptype) {
+		if (entry->single.func == probe &&
+				entry->single.probe_private == probe_private)
+			return ERR_PTR(-EBUSY);
+		if (entry->single.func == __mark_empty_function) {
+			/* 0 -> 1 probes */
+			entry->single.func = probe;
+			entry->single.probe_private = probe_private;
+			entry->refcount = 1;
+			entry->ptype = 0;
+			debug_print_probes(entry);
+			return NULL;
+		} else {
+			/* 1 -> 2 probes */
+			nr_probes = 1;
+			old = NULL;
+		}
+	} else {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
+			if (old[nr_probes].func == probe
+					&& old[nr_probes].probe_private
+						== probe_private)
+				return ERR_PTR(-EBUSY);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
+			GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (!old)
+		new[0] = entry->single;
+	else
+		memcpy(new, old,
+			nr_probes * sizeof(struct marker_probe_closure));
+	new[nr_probes].func = probe;
+	new[nr_probes].probe_private = probe_private;
+	entry->refcount = nr_probes + 1;
+	entry->multi = new;
+	entry->ptype = 1;
+	debug_print_probes(entry);
+	return old;
+}
+
+static struct marker_probe_closure *
+marker_entry_remove_probe(struct marker_entry *entry,
+		marker_probe_func *probe, void *probe_private)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	struct marker_probe_closure *old, *new;
+
+	old = entry->multi;
+
+	debug_print_probes(entry);
+	if (!entry->ptype) {
+		/* 0 -> N is an error */
+		WARN_ON(entry->single.func == __mark_empty_function);
+		/* 1 -> 0 probes */
+		WARN_ON(probe && entry->single.func != probe);
+		WARN_ON(entry->single.probe_private != probe_private);
+		entry->single.func = __mark_empty_function;
+		entry->refcount = 0;
+		entry->ptype = 0;
+		debug_print_probes(entry);
+		return NULL;
+	} else {
+		/* (N -> M), (N > 1, M >= 0) probes */
+		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+			if ((!probe || old[nr_probes].func == probe)
+					&& old[nr_probes].probe_private
+						== probe_private)
+				nr_del++;
+		}
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->single.func = __mark_empty_function;
+		entry->refcount = 0;
+		entry->ptype = 0;
+	} else if (nr_probes - nr_del == 1) {
+		/* N -> 1, (N > 1) */
+		for (i = 0; old[i].func; i++)
+			if ((probe && old[i].func != probe) ||
+					old[i].probe_private != probe_private)
+				entry->single = old[i];
+		entry->refcount = 1;
+		entry->ptype = 0;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 1) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(struct marker_probe_closure), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i].func; i++)
+			if ((probe && old[i].func != probe) ||
+					old[i].probe_private != probe_private)
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->ptype = 1;
+		entry->multi = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
 /*
  * Get marker if the marker is present in the marker hash table.
  * Must be called with markers_mutex held.
@@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name)
  * Add the marker to the marker hash table. Must be called with markers_mutex
  * held.
  */
-static int add_marker(const char *name, const char *format,
-	marker_probe_func *probe, void *private)
+static struct marker_entry *add_marker(const char *name, const char *format)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
@@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format,
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			printk(KERN_NOTICE
-				"Marker %s busy, probe %p already installed\n",
-				name, e->probe);
-			return -EBUSY;	/* Already there */
+				"Marker %s busy\n", name);
+			return ERR_PTR(-EBUSY);	/* Already there */
 		}
 	}
 	/*
@@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format,
 	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
 			GFP_KERNEL);
 	if (!e)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	memcpy(&e->name[0], name, name_len);
 	if (format) {
 		e->format = &e->name[name_len];
 		memcpy(e->format, format, format_len);
+		if (strcmp(e->format, MARK_NOARGS) == 0)
+			e->call = marker_probe_cb_noarg;
+		else
+			e->call = marker_probe_cb;
 		trace_mark(core_marker_format, "name %s format %s",
 				e->name, e->format);
-	} else
+	} else {
 		e->format = NULL;
-	e->probe = probe;
-	e->private = private;
+		e->call = marker_probe_cb;
+	}
+	e->single.func = __mark_empty_function;
+	e->single.probe_private = NULL;
+	e->multi = NULL;
+	e->ptype = 0;
 	e->refcount = 0;
+	e->rcu_pending = 0;
 	hlist_add_head(&e->hlist, head);
-	return 0;
+	return e;
 }
 
 /*
  * Remove the marker from the marker hash table. Must be called with mutex_lock
  * held.
  */
-static void *remove_marker(const char *name)
+static int remove_marker(const char *name)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct marker_entry *e;
 	int found = 0;
 	size_t len = strlen(name) + 1;
-	void *private = NULL;
 	u32 hash = jhash(name, len-1, 0);
 
 	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
@@ -167,12 +435,16 @@ static void *remove_marker(const char *name)
 			break;
 		}
 	}
-	if (found) {
-		private = e->private;
-		hlist_del(&e->hlist);
-		kfree(e);
-	}
-	return private;
+	if (!found)
+		return -ENOENT;
+	if (e->single.func != __mark_empty_function)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier();
+	kfree(e);
+	return 0;
 }
 
 /*
@@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	size_t name_len = strlen((*entry)->name) + 1;
 	size_t format_len = strlen(format) + 1;
 
+
 	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
 			GFP_KERNEL);
 	if (!e)
@@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 	memcpy(&e->name[0], (*entry)->name, name_len);
 	e->format = &e->name[name_len];
 	memcpy(e->format, format, format_len);
-	e->probe = (*entry)->probe;
-	e->private = (*entry)->private;
+	if (strcmp(e->format, MARK_NOARGS) == 0)
+		e->call = marker_probe_cb_noarg;
+	else
+		e->call = marker_probe_cb;
+	e->single = (*entry)->single;
+	e->multi = (*entry)->multi;
+	e->ptype = (*entry)->ptype;
 	e->refcount = (*entry)->refcount;
+	e->rcu_pending = 0;
 	hlist_add_before(&e->hlist, &(*entry)->hlist);
 	hlist_del(&(*entry)->hlist);
+	/* Make sure the call_rcu has been executed */
+	if ((*entry)->rcu_pending)
+		rcu_barrier();
 	kfree(*entry);
 	*entry = e;
 	trace_mark(core_marker_format, "name %s format %s",
@@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
 /*
  * Sets the probe callback corresponding to one marker.
  */
-static int set_marker(struct marker_entry **entry, struct marker *elem)
+static int set_marker(struct marker_entry **entry, struct marker *elem,
+		int active)
 {
 	int ret;
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
@@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem)
 		if (ret)
 			return ret;
 	}
-	elem->call = (*entry)->probe;
-	elem->private = (*entry)->private;
-	elem->state = 1;
+
+	/*
+	 * probe_cb setup (statically known) is done here. It is
+	 * asynchronous with the rest of execution, therefore we only
+	 * pass from a "safe" callback (with argument) to an "unsafe"
+	 * callback (does not set arguments).
+	 */
+	elem->call = (*entry)->call;
+	/*
+	 * Sanity check :
+	 * We only update the single probe private data when the ptr is
+	 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
+	 */
+	WARN_ON(elem->single.func != __mark_empty_function
+		&& elem->single.probe_private
+		!= (*entry)->single.probe_private &&
+		!elem->ptype);
+	elem->single.probe_private = (*entry)->single.probe_private;
+	/*
+	 * Make sure the private data is valid when we update the
+	 * single probe ptr.
+	 */
+	smp_wmb();
+	elem->single.func = (*entry)->single.func;
+	/*
+	 * We also make sure that the new probe callbacks array is consistent
+	 * before setting a pointer to it.
+	 */
+	rcu_assign_pointer(elem->multi, (*entry)->multi);
+	/*
+	 * Update the function or multi probe array pointer before setting the
+	 * ptype.
+	 */
+	smp_wmb();
+	elem->ptype = (*entry)->ptype;
+	elem->state = active;
+
 	return 0;
 }
 
@@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem)
  */
 static void disable_marker(struct marker *elem)
 {
+	/* leave "call" as is. It is known statically. */
 	elem->state = 0;
-	elem->call = __mark_empty_function;
+	elem->single.func = __mark_empty_function;
+	/* Update the function before setting the ptype */
+	smp_wmb();
+	elem->ptype = 0;	/* single probe */
 	/*
 	 * Leave the private data and id there, because removal is racy and
 	 * should be done only after a synchronize_sched(). These are never used
@@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem)
  * marker_update_probe_range - Update a probe range
  * @begin: beginning of the range
  * @end: end of the range
- * @probe_module: module address of the probe being updated
- * @refcount: number of references left to the given probe_module (out)
  *
  * Updates the probe callback corresponding to a range of markers.
  */
 void marker_update_probe_range(struct marker *begin,
-	struct marker *end, struct module *probe_module,
-	int *refcount)
+	struct marker *end)
 {
 	struct marker *iter;
 	struct marker_entry *mark_entry;
@@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin,
 	mutex_lock(&markers_mutex);
 	for (iter = begin; iter < end; iter++) {
 		mark_entry = get_marker(iter->name);
-		if (mark_entry && mark_entry->refcount) {
-			set_marker(&mark_entry, iter);
+		if (mark_entry) {
+			set_marker(&mark_entry, iter,
+					!!mark_entry->refcount);
 			/*
 			 * ignore error, continue
 			 */
-			if (probe_module)
-				if (probe_module ==
-			__module_text_address((unsigned long)mark_entry->probe))
-					(*refcount)++;
 		} else {
 			disable_marker(iter);
 		}
@@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin,
  * Issues a synchronize_sched() when no reference to the module passed
  * as parameter is found in the probes so the probe module can be
  * safely unloaded from now on.
+ *
+ * Internal callback only changed before the first probe is connected to it.
+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
+ * transitions.  All other transitions will leave the old private data valid.
+ * This makes the non-atomicity of the callback/private data updates valid.
+ *
+ * "special case" updates :
+ * 0 -> 1 callback
+ * 1 -> 0 callback
+ * 1 -> 2 callbacks
+ * 2 -> 1 callbacks
+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
+ * Site effect : marker_set_format may delete the marker entry (creating a
+ * replacement).
  */
-static void marker_update_probes(struct module *probe_module)
+static void marker_update_probes(void)
 {
-	int refcount = 0;
-
 	/* Core kernel markers */
-	marker_update_probe_range(__start___markers,
-			__stop___markers, probe_module, &refcount);
+	marker_update_probe_range(__start___markers, __stop___markers);
 	/* Markers in modules. */
-	module_update_markers(probe_module, &refcount);
-	if (probe_module && refcount == 0) {
-		synchronize_sched();
-		deferred_sync = 0;
-	}
+	module_update_markers();
 }
 
 /**
@@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module)
  * @name: marker name
  * @format: format string
  * @probe: probe handler
- * @private: probe private data
+ * @probe_private: probe private data
  *
  * private data must be a valid allocated memory address, or NULL.
  * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
  */
 int marker_probe_register(const char *name, const char *format,
-			marker_probe_func *probe, void *private)
+			marker_probe_func *probe, void *probe_private)
 {
 	struct marker_entry *entry;
 	int ret = 0;
+	struct marker_probe_closure *old;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
-	if (entry && entry->refcount) {
-		ret = -EBUSY;
-		goto end;
-	}
-	if (deferred_sync) {
-		synchronize_sched();
-		deferred_sync = 0;
+	if (!entry) {
+		entry = add_marker(name, format);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
 	}
-	ret = add_marker(name, format, probe, private);
-	if (ret)
+	/*
+	 * If we detect that a call_rcu is pending for this marker,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_add_probe(entry, probe, probe_private);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
 		goto end;
+	}
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
-	return ret;
+	marker_update_probes();		/* may update entry */
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	WARN_ON(!entry);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
 end:
 	mutex_unlock(&markers_mutex);
 	return ret;
@@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register);
 /**
  * marker_probe_unregister -  Disconnect a probe from a marker
  * @name: marker name
+ * @probe: probe function pointer
+ * @probe_private: probe private data
  *
  * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
  */
-void *marker_probe_unregister(const char *name)
+int marker_probe_unregister(const char *name,
+	marker_probe_func *probe, void *probe_private)
 {
-	struct module *probe_module;
 	struct marker_entry *entry;
-	void *private;
+	struct marker_probe_closure *old;
+	int ret = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
 	if (!entry) {
-		private = ERR_PTR(-ENOENT);
+		ret = -ENOENT;
 		goto end;
 	}
-	entry->refcount = 0;
-	/* In what module is the probe handler ? */
-	probe_module = __module_text_address((unsigned long)entry->probe);
-	private = remove_marker(name);
-	deferred_sync = 1;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_remove_probe(entry, probe, probe_private);
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(probe_module);
-	return private;
+	marker_update_probes();		/* may update entry */
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
+	remove_marker(name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-	return private;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_probe_unregister);
 
-/**
- * marker_probe_unregister_private_data -  Disconnect a probe from a marker
- * @private: probe private data
- *
- * Unregister a marker by providing the registered private data.
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- */
-void *marker_probe_unregister_private_data(void *private)
+static struct marker_entry *
+get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
 {
-	struct module *probe_module;
-	struct hlist_head *head;
-	struct hlist_node *node;
 	struct marker_entry *entry;
-	int found = 0;
 	unsigned int i;
+	struct hlist_head *head;
+	struct hlist_node *node;
 
-	mutex_lock(&markers_mutex);
 	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
 		head = &marker_table[i];
 		hlist_for_each_entry(entry, node, head, hlist) {
-			if (entry->private == private) {
-				found = 1;
-				goto iter_end;
+			if (!entry->ptype) {
+				if (entry->single.func == probe
+						&& entry->single.probe_private
+						== probe_private)
+					return entry;
+			} else {
+				struct marker_probe_closure *closure;
+				closure = entry->multi;
+				for (i = 0; closure[i].func; i++) {
+					if (closure[i].func == probe &&
+							closure[i].probe_private
+							== probe_private)
+						return entry;
+				}
 			}
 		}
 	}
-iter_end:
-	if (!found) {
-		private = ERR_PTR(-ENOENT);
-		goto end;
-	}
-	entry->refcount = 0;
-	/* In what module is the probe handler ? */
-	probe_module = __module_text_address((unsigned long)entry->probe);
-	private = remove_marker(entry->name);
-	deferred_sync = 1;
-	mutex_unlock(&markers_mutex);
-	marker_update_probes(probe_module);
-	return private;
-end:
-	mutex_unlock(&markers_mutex);
-	return private;
+	return NULL;
 }
-EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
 
 /**
- * marker_arm - Arm a marker
- * @name: marker name
+ * marker_probe_unregister_private_data -  Disconnect a probe from a marker
+ * @probe: probe function
+ * @probe_private: probe private data
  *
- * Activate a marker. It keeps a reference count of the number of
- * arming/disarming done.
- * Returns 0 if ok, error value on error.
+ * Unregister a probe by providing the registered private data.
+ * Only removes the first marker found in hash table.
+ * Return 0 on success or error value.
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
  */
-int marker_arm(const char *name)
+int marker_probe_unregister_private_data(marker_probe_func *probe,
+		void *probe_private)
 {
 	struct marker_entry *entry;
 	int ret = 0;
+	struct marker_probe_closure *old;
 
 	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
+	entry = get_marker_from_private_data(probe, probe_private);
 	if (!entry) {
 		ret = -ENOENT;
 		goto end;
 	}
-	/*
-	 * Only need to update probes when refcount passes from 0 to 1.
-	 */
-	if (entry->refcount++)
-		goto end;
-end:
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = marker_entry_remove_probe(entry, NULL, probe_private);
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_arm);
-
-/**
- * marker_disarm - Disarm a marker
- * @name: marker name
- *
- * Disarm a marker. It keeps a reference count of the number of arming/disarming
- * done.
- * Returns 0 if ok, error value on error.
- */
-int marker_disarm(const char *name)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-
+	marker_update_probes();		/* may update entry */
 	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	/*
-	 * Only permit decrement refcount if higher than 0.
-	 * Do probe update only on 1 -> 0 transition.
-	 */
-	if (entry->refcount) {
-		if (--entry->refcount)
-			goto end;
-	} else {
-		ret = -EPERM;
-		goto end;
-	}
+	entry = get_marker_from_private_data(probe, probe_private);
+	WARN_ON(!entry);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+	call_rcu(&entry->rcu, free_old_closure);
+	remove_marker(entry->name);	/* Ignore busy error message */
 end:
 	mutex_unlock(&markers_mutex);
-	marker_update_probes(NULL);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(marker_disarm);
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
 
 /**
  * marker_get_private_data - Get a marker's probe private data
  * @name: marker name
+ * @probe: probe to match
+ * @num: get the nth matching probe's private data
  *
+ * Returns the nth private data pointer (starting from 0) matching, or an
+ * ERR_PTR.
  * Returns the private data pointer, or an ERR_PTR.
  * The private data pointer should _only_ be dereferenced if the caller is the
  * owner of the data, or its content could vanish. This is mostly used to
  * confirm that a caller is the owner of a registered probe.
  */
-void *marker_get_private_data(const char *name)
+void *marker_get_private_data(const char *name, marker_probe_func *probe,
+		int num)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct marker_entry *e;
 	size_t name_len = strlen(name) + 1;
 	u32 hash = jhash(name, name_len-1, 0);
-	int found = 0;
+	int i;
 
 	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
 	hlist_for_each_entry(e, node, head, hlist) {
 		if (!strcmp(name, e->name)) {
-			found = 1;
-			return e->private;
+			if (!e->ptype) {
+				if (num == 0 && e->single.func == probe)
+					return e->single.probe_private;
+				else
+					break;
+			} else {
+				struct marker_probe_closure *closure;
+				int match = 0;
+				closure = e->multi;
+				for (i = 0; closure[i].func; i++) {
+					if (closure[i].func != probe)
+						continue;
+					if (match++ == num)
+						return closure[i].probe_private;
+				}
+			}
 		}
 	}
 	return ERR_PTR(-ENOENT);
diff --git a/kernel/module.c b/kernel/module.c
index 4202da97a1da..92595bad3812 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2038,7 +2038,7 @@ static struct module *load_module(void __user *umod,
 #ifdef CONFIG_MARKERS
 	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers, NULL, NULL);
+			mod->markers + mod->num_markers);
 #endif
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
@@ -2564,7 +2564,7 @@ EXPORT_SYMBOL(struct_module);
 #endif
 
 #ifdef CONFIG_MARKERS
-void module_update_markers(struct module *probe_module, int *refcount)
+void module_update_markers(void)
 {
 	struct module *mod;
 
@@ -2572,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount)
 	list_for_each_entry(mod, &modules, list)
 		if (!mod->taints)
 			marker_update_probe_range(mod->markers,
-				mod->markers + mod->num_markers,
-				probe_module, refcount);
+				mod->markers + mod->num_markers);
 	mutex_unlock(&module_mutex);
 }
 #endif
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index a36797535615..c8e099d4d1fd 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c
@@ -20,31 +20,27 @@ struct probe_data {
 	marker_probe_func *probe_func;
 };
 
-void probe_subsystem_event(const struct marker *mdata, void *private,
-	const char *format, ...)
+void probe_subsystem_event(void *probe_data, void *call_data,
+	const char *format, va_list *args)
 {
-	va_list ap;
 	/* Declare args */
 	unsigned int value;
 	const char *mystr;
 
 	/* Assign args */
-	va_start(ap, format);
-	value = va_arg(ap, typeof(value));
-	mystr = va_arg(ap, typeof(mystr));
+	value = va_arg(*args, typeof(value));
+	mystr = va_arg(*args, typeof(mystr));
 
 	/* Call printk */
-	printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+	printk(KERN_INFO "Value %u, string %s\n", value, mystr);
 
 	/* or count, check rights, serialize data in a buffer */
-
-	va_end(ap);
 }
 
 atomic_t eventb_count = ATOMIC_INIT(0);
 
-void probe_subsystem_eventb(const struct marker *mdata, void *private,
-	const char *format, ...)
+void probe_subsystem_eventb(void *probe_data, void *call_data,
+	const char *format, va_list *args)
 {
 	/* Increment counter */
 	atomic_inc(&eventb_count);
@@ -72,10 +68,6 @@ static int __init probe_init(void)
 		if (result)
 			printk(KERN_INFO "Unable to register probe %s\n",
 				probe_array[i].name);
-		result = marker_arm(probe_array[i].name);
-		if (result)
-			printk(KERN_INFO "Unable to arm probe %s\n",
-				probe_array[i].name);
 	}
 	return 0;
 }
@@ -85,7 +77,8 @@ static void __exit probe_fini(void)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
-		marker_probe_unregister(probe_array[i].name);
+		marker_probe_unregister(probe_array[i].name,
+			probe_array[i].probe_func, &probe_array[i]);
 	printk(KERN_INFO "Number of event b : %u\n",
 			atomic_read(&eventb_count));
 }
-- 
cgit 


From b2e3e658b344c6bcfb8fb694100ab2f2b5b2edb0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Wed, 13 Feb 2008 15:03:39 -0800
Subject: Linux Kernel Markers: create modpost file

This adds some new magic in the MODPOST phase for CONFIG_MARKERS.  Analogous
to the Module.symvers file, the build will now write a Module.markers file
when CONFIG_MARKERS=y is set.  This file lists the name, defining module, and
format string of each marker, separated by \t characters.  This simple text
file can be used by offline build procedures for instrumentation code,
analogous to how System.map and Module.symvers can be useful to have for
kernels other than the one you are running right now.

The strings are made easy to extract by having the __trace_mark macro define
the name and format together in a single array called __mstrtab_* in the
__markers_strings section.  This is straightforward and reliable as long as
the marker structs are always defined by this macro.  It is an unreasonable
amount of hairy work to extract the string pointers from the __markers section
structs, which entails handling a relocation type for every machine under the
sun.

Mathieu :
- Ran through checkpatch.pl

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: David Smith <dsmith@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/marker.h   |   9 +--
 scripts/Makefile.modpost |  11 ++++
 scripts/mod/modpost.c    | 164 ++++++++++++++++++++++++++++++++++++++++++++++-
 scripts/mod/modpost.h    |   3 +
 4 files changed, 180 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/marker.h b/include/linux/marker.h
index b5f95637f289..5df879dc3776 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -61,15 +61,12 @@ struct marker {
  */
 #define __trace_mark(name, call_private, format, args...)		\
 	do {								\
-		static const char __mstrtab_name_##name[]		\
+		static const char __mstrtab_##name[]			\
 		__attribute__((section("__markers_strings")))		\
-		= #name;						\
-		static const char __mstrtab_format_##name[]		\
-		__attribute__((section("__markers_strings")))		\
-		= format;						\
+		= #name "\0" format;					\
 		static struct marker __mark_##name			\
 		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
 		0, 0, marker_probe_cb,					\
 		{ __mark_empty_function, NULL}, NULL };			\
 		__mark_check_format(format, ## args);			\
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 65e707e1ffc3..cfc004e04417 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,6 +13,7 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -45,6 +46,10 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -63,6 +68,8 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTMOD),-I $(modulesymfile))      \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -82,6 +89,10 @@ vmlinux.o: FORCE
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
+ifdef CONFIG_MARKERS
+$(markersfile):	       __modpost ;
+endif
+
 
 # Step 5), compile all *.mod.c files
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index dbe1fb5e8cc0..61742771c65d 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -11,6 +11,8 @@
  * Usage: modpost vmlinux module1.o module2.o ...
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -435,6 +437,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 			info->export_unused_gpl_sec = i;
 		else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
 			info->export_gpl_future_sec = i;
+		else if (strcmp(secname, "__markers_strings") == 0)
+			info->markers_strings_sec = i;
 
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
 			continue;
@@ -1470,6 +1474,62 @@ static void check_sec_ref(struct module *mod, const char *modname,
 	}
 }
 
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+	const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+	const char *strings = (const char *) info->hdr + sh->sh_offset;
+	const Elf_Sym *sym, *first_sym, *last_sym;
+	size_t n;
+
+	if (!info->markers_strings_sec)
+		return;
+
+	/*
+	 * First count the strings.  We look for all the symbols defined
+	 * in the __markers_strings section named __mstrtab_*.  For
+	 * these local names, the compiler puts a random .NNN suffix on,
+	 * so the names don't correspond exactly.
+	 */
+	first_sym = last_sym = NULL;
+	n = 0;
+	for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			if (first_sym == NULL)
+				first_sym = sym;
+			last_sym = sym;
+			++n;
+		}
+
+	if (n == 0)
+		return;
+
+	/*
+	 * Now collect each name and format into a line for the output.
+	 * Lines look like:
+	 *	marker_name	vmlinux	marker %s format %d
+	 * The format string after the second \t can use whitespace.
+	 */
+	mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+	mod->nmarkers = n;
+
+	n = 0;
+	for (sym = first_sym; sym <= last_sym; sym++)
+		if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+		    sym->st_shndx == info->markers_strings_sec &&
+		    !strncmp(info->strtab + sym->st_name,
+			     "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+			const char *name = strings + sym->st_value;
+			const char *fmt = strchr(name, '\0') + 1;
+			char *line = NULL;
+			asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+			NOFAIL(line);
+			mod->markers[n++] = line;
+		}
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1521,6 +1581,8 @@ static void read_symbols(char *modname)
 		get_src_version(modname, mod->srcversion,
 				sizeof(mod->srcversion)-1);
 
+	get_markers(&info, mod);
+
 	parse_elf_finish(&info);
 
 	/* Our trick to get versioning for struct_module - it's
@@ -1867,16 +1929,104 @@ static void write_dump(const char *fname)
 	write_if_changed(&buf, fname);
 }
 
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+	char *line = NULL;
+	asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+	NOFAIL(line);
+
+	mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+						     sizeof mod->markers[0])));
+	mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+	unsigned long size, pos = 0;
+	void *file = grab_file(fname, &size);
+	char *line;
+
+	if (!file)		/* No old markers, silently ignore */
+		return;
+
+	while ((line = get_next_line(&pos, file, size))) {
+		char *marker, *modname, *fmt;
+		struct module *mod;
+
+		marker = line;
+		modname = strchr(marker, '\t');
+		if (!modname)
+			goto fail;
+		*modname++ = '\0';
+		fmt = strchr(modname, '\t');
+		if (!fmt)
+			goto fail;
+		*fmt++ = '\0';
+		if (*marker == '\0' || *modname == '\0')
+			goto fail;
+
+		mod = find_module(modname);
+		if (!mod) {
+			if (is_vmlinux(modname))
+				have_vmlinux = 1;
+			mod = new_module(NOFAIL(strdup(modname)));
+			mod->skip = 1;
+		}
+
+		add_marker(mod, marker, fmt);
+	}
+	return;
+fail:
+	fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+	struct buffer buf = { };
+	struct module *mod;
+	size_t i;
+
+	for (mod = modules; mod; mod = mod->next)
+		if ((!external_module || !mod->skip) && mod->markers != NULL) {
+			/*
+			 * Sort the strings so we can skip duplicates when
+			 * we write them out.
+			 */
+			qsort(mod->markers, mod->nmarkers,
+			      sizeof mod->markers[0], &compare_strings);
+			for (i = 0; i < mod->nmarkers; ++i) {
+				char *line = mod->markers[i];
+				buf_write(&buf, line, strlen(line));
+				while (i + 1 < mod->nmarkers &&
+				       !strcmp(mod->markers[i],
+					       mod->markers[i + 1]))
+					free(mod->markers[i++]);
+				free(mod->markers[i]);
+			}
+			free(mod->markers);
+			mod->markers = NULL;
+		}
+
+	write_if_changed(&buf, fname);
+}
+
 int main(int argc, char **argv)
 {
 	struct module *mod;
 	struct buffer buf = { };
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
+	char *markers_read = NULL;
+	char *markers_write = NULL;
 	int opt;
 	int err;
 
-	while ((opt = getopt(argc, argv, "i:I:msSo:aw")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:msSo:awM:K:")) != -1) {
 		switch (opt) {
 		case 'i':
 			kernel_read = optarg;
@@ -1903,6 +2053,12 @@ int main(int argc, char **argv)
 		case 'w':
 			warn_unresolved = 1;
 			break;
+			case 'M':
+				markers_write = optarg;
+				break;
+			case 'K':
+				markers_read = optarg;
+				break;
 		default:
 			exit(1);
 		}
@@ -1950,5 +2106,11 @@ int main(int argc, char **argv)
 		     "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n",
 		     sec_mismatch_count);
 
+	if (markers_read)
+		read_markers(markers_read);
+
+	if (markers_write)
+		write_markers(markers_write);
+
 	return err;
 }
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 999f15e0e008..565c5872407e 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -112,6 +112,8 @@ struct module {
 	int has_init;
 	int has_cleanup;
 	struct buffer dev_table_buf;
+	char **markers;
+	size_t nmarkers;
 	char	     srcversion[25];
 };
 
@@ -126,6 +128,7 @@ struct elf_info {
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
+	Elf_Section  markers_strings_sec;
 	const char   *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;
-- 
cgit 


From 4fcb2fcd4d0678b8ae103d257dcb28074cbfc7fa Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 11 Feb 2008 17:46:31 -0800
Subject: ACPI, cpuidle: Clarify C-state description in sysfs

Add a new sysfs entry under cpuidle states. desc - can be used by driver to
communicate to userspace any specific information about the state.
This helps in identifying the exact hardware C-states behind the ACPI C-state
definition.

Idea is to export this through powertop, which will help to map the C-state
reported by powertop to actual hardware C-state.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/cstate.c |  2 ++
 drivers/acpi/processor_idle.c | 11 +++++++++++
 drivers/cpuidle/cpuidle.c     |  3 ++-
 drivers/cpuidle/sysfs.c       | 14 +++++++++++---
 include/acpi/processor.h      |  9 ++++++---
 include/linux/cpuidle.h       |  2 ++
 6 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 10b67170b133..8ca3557a6d59 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -126,6 +126,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 		printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
 		       "state\n", cx->type);
 	}
+	snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+		 cx->address);
 
 out:
 	set_cpus_allowed(current, saved_mask);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 32003fdc91e8..baa389b908e2 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -945,11 +945,16 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 				 * Otherwise, ignore this info and continue.
 				 */
 				cx.entry_method = ACPI_CSTATE_HALT;
+				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
 			} else {
 				continue;
 			}
+		} else {
+			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
+				 cx.address);
 		}
 
+
 		obj = &(element->package.elements[2]);
 		if (obj->type != ACPI_TYPE_INTEGER)
 			continue;
@@ -1643,6 +1648,11 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 		return -EINVAL;
 	}
 
+	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+		dev->states[i].name[0] = '\0';
+		dev->states[i].desc[0] = '\0';
+	}
+
 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
 		cx = &pr->power.states[i];
 		state = &dev->states[count];
@@ -1659,6 +1669,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 		cpuidle_set_statedata(state, cx);
 
 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
+		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
 		state->exit_latency = cx->latency;
 		state->target_residency = cx->latency * latency_factor;
 		state->power_usage = cx->power;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 60f71e6345e3..d73663a52324 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -219,7 +219,8 @@ static void poll_idle_init(struct cpuidle_device *dev)
 
 	cpuidle_set_statedata(state, NULL);
 
-	snprintf(state->name, CPUIDLE_NAME_LEN, "C0 (poll idle)");
+	snprintf(state->name, CPUIDLE_NAME_LEN, "C0");
+	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
 	state->exit_latency = 0;
 	state->target_residency = 0;
 	state->power_usage = -1;
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 088ea74edd34..69102ca05685 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -218,16 +218,23 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
 	return sprintf(buf, "%u\n", state->_name);\
 }
 
-static ssize_t show_state_name(struct cpuidle_state *state, char *buf)
-{
-	return sprintf(buf, "%s\n", state->name);
+#define define_show_state_str_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
+{ \
+	if (state->_name[0] == '\0')\
+		return sprintf(buf, "<null>\n");\
+	return sprintf(buf, "%s\n", state->_name);\
 }
 
 define_show_state_function(exit_latency)
 define_show_state_function(power_usage)
 define_show_state_function(usage)
 define_show_state_function(time)
+define_show_state_str_function(name)
+define_show_state_str_function(desc)
+
 define_one_state_ro(name, show_state_name);
+define_one_state_ro(desc, show_state_desc);
 define_one_state_ro(latency, show_state_exit_latency);
 define_one_state_ro(power, show_state_power_usage);
 define_one_state_ro(usage, show_state_usage);
@@ -235,6 +242,7 @@ define_one_state_ro(time, show_state_time);
 
 static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_name.attr,
+	&attr_desc.attr,
 	&attr_latency.attr,
 	&attr_power.attr,
 	&attr_usage.attr,
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index cdc8004cfd12..06480bcabfdc 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -32,9 +32,11 @@
 #define DOMAIN_COORD_TYPE_SW_ANY	0xfd
 #define DOMAIN_COORD_TYPE_HW_ALL	0xfe
 
-#define ACPI_CSTATE_SYSTEMIO	(0)
-#define ACPI_CSTATE_FFH		(1)
-#define ACPI_CSTATE_HALT	(2)
+#define ACPI_CSTATE_SYSTEMIO	0
+#define ACPI_CSTATE_FFH		1
+#define ACPI_CSTATE_HALT	2
+
+#define ACPI_CX_DESC_LEN	32
 
 /* Power Management */
 
@@ -74,6 +76,7 @@ struct acpi_processor_cx {
 	u64 time;
 	struct acpi_processor_cx_policy promotion;
 	struct acpi_processor_cx_policy demotion;
+	char desc[ACPI_CX_DESC_LEN];
 };
 
 struct acpi_processor_power {
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 385d45b616db..6b72a4584086 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -19,6 +19,7 @@
 
 #define CPUIDLE_STATE_MAX	8
 #define CPUIDLE_NAME_LEN	16
+#define CPUIDLE_DESC_LEN	32
 
 struct cpuidle_device;
 
@@ -29,6 +30,7 @@ struct cpuidle_device;
 
 struct cpuidle_state {
 	char		name[CPUIDLE_NAME_LEN];
+	char		desc[CPUIDLE_DESC_LEN];
 	void		*driver_data;
 
 	unsigned int	flags;
-- 
cgit 


From b9482378916abb9a1e0a2334187cdc67f2deda2c Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@newgolddream.dyndns.info>
Date: Wed, 6 Feb 2008 22:46:21 +0000
Subject: maple: fix up whitespace damage.

This patch is fundamentally about fixing up the whitespace problems
introduced by my previous patch (that brought the code into mainline). A
second patch will follow that will fix memory leaks. The two need to be
applied sequentially.

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/maple/maple.c | 978 ++++++++++++++++++++++++-----------------------
 include/linux/maple.h    | 101 ++---
 2 files changed, 543 insertions(+), 536 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index e52a6296ca46..9c48ccc44c29 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -57,8 +57,8 @@ static int started, scanning, liststatus;
 static struct kmem_cache *maple_queue_cache;
 
 struct maple_device_specify {
-       int port;
-       int unit;
+	int port;
+	int unit;
 };
 
 /**
@@ -68,22 +68,23 @@ struct maple_device_specify {
  */
 int maple_driver_register(struct device_driver *drv)
 {
-       if (!drv)
-               return -EINVAL;
-       drv->bus = &maple_bus_type;
-       return driver_register(drv);
+	if (!drv)
+		return -EINVAL;
+	drv->bus = &maple_bus_type;
+	return driver_register(drv);
 }
+
 EXPORT_SYMBOL_GPL(maple_driver_register);
 
 /* set hardware registers to enable next round of dma */
 static void maplebus_dma_reset(void)
 {
-       ctrl_outl(MAPLE_MAGIC, MAPLE_RESET);
-       /* set trig type to 0 for software trigger, 1 for hardware (VBLANK) */
-       ctrl_outl(1, MAPLE_TRIGTYPE);
-       ctrl_outl(MAPLE_2MBPS | MAPLE_TIMEOUT(50000), MAPLE_SPEED);
-       ctrl_outl(PHYSADDR(maple_sendbuf), MAPLE_DMAADDR);
-       ctrl_outl(1, MAPLE_ENABLE);
+	ctrl_outl(MAPLE_MAGIC, MAPLE_RESET);
+	/* set trig type to 0 for software trigger, 1 for hardware (VBLANK) */
+	ctrl_outl(1, MAPLE_TRIGTYPE);
+	ctrl_outl(MAPLE_2MBPS | MAPLE_TIMEOUT(50000), MAPLE_SPEED);
+	ctrl_outl(PHYSADDR(maple_sendbuf), MAPLE_DMAADDR);
+	ctrl_outl(1, MAPLE_ENABLE);
 }
 
 /**
@@ -94,27 +95,28 @@ static void maplebus_dma_reset(void)
  * @function: the function code for the device
  */
 void maple_getcond_callback(struct maple_device *dev,
-                           void (*callback) (struct mapleq * mq),
-                           unsigned long interval, unsigned long function)
+			    void (*callback) (struct mapleq * mq),
+			    unsigned long interval, unsigned long function)
 {
-       dev->callback = callback;
-       dev->interval = interval;
-       dev->function = cpu_to_be32(function);
-       dev->when = jiffies;
+	dev->callback = callback;
+	dev->interval = interval;
+	dev->function = cpu_to_be32(function);
+	dev->when = jiffies;
 }
+
 EXPORT_SYMBOL_GPL(maple_getcond_callback);
 
 static int maple_dma_done(void)
 {
-       return (ctrl_inl(MAPLE_STATE) & 1) == 0;
+	return (ctrl_inl(MAPLE_STATE) & 1) == 0;
 }
 
 static void maple_release_device(struct device *dev)
 {
-       if (dev->type) {
-               kfree(dev->type->name);
-               kfree(dev->type);
-       }
+	if (dev->type) {
+		kfree(dev->type->name);
+		kfree(dev->type);
+	}
 }
 
 /**
@@ -123,60 +125,61 @@ static void maple_release_device(struct device *dev)
  */
 void maple_add_packet(struct mapleq *mq)
 {
-       mutex_lock(&maple_list_lock);
-       list_add(&mq->list, &maple_waitq);
-       mutex_unlock(&maple_list_lock);
+	mutex_lock(&maple_list_lock);
+	list_add(&mq->list, &maple_waitq);
+	mutex_unlock(&maple_list_lock);
 }
+
 EXPORT_SYMBOL_GPL(maple_add_packet);
 
 static struct mapleq *maple_allocq(struct maple_device *dev)
 {
-       struct mapleq *mq;
+	struct mapleq *mq;
 
-       mq = kmalloc(sizeof(*mq), GFP_KERNEL);
-       if (!mq)
-               return NULL;
+	mq = kmalloc(sizeof(*mq), GFP_KERNEL);
+	if (!mq)
+		return NULL;
 
-       mq->dev = dev;
-       mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
-       mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
-       if (!mq->recvbuf) {
-               kfree(mq);
-               return NULL;
-       }
+	mq->dev = dev;
+	mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
+	mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
+	if (!mq->recvbuf) {
+		kfree(mq);
+		return NULL;
+	}
 
-       return mq;
+	return mq;
 }
 
 static struct maple_device *maple_alloc_dev(int port, int unit)
 {
-       struct maple_device *dev;
+	struct maple_device *dev;
 
-       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-       if (!dev)
-               return NULL;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
 
-       dev->port = port;
-       dev->unit = unit;
-       dev->mq = maple_allocq(dev);
+	dev->port = port;
+	dev->unit = unit;
+	dev->mq = maple_allocq(dev);
 
-       if (!dev->mq) {
-               kfree(dev);
-               return NULL;
-       }
+	if (!dev->mq) {
+		kfree(dev);
+		return NULL;
+	}
 
-       return dev;
+	return dev;
 }
 
 static void maple_free_dev(struct maple_device *mdev)
 {
-       if (!mdev)
-               return;
-       if (mdev->mq) {
-               kmem_cache_free(maple_queue_cache, mdev->mq->recvbufdcsp);
-               kfree(mdev->mq);
-       }
-       kfree(mdev);
+	if (!mdev)
+		return;
+	if (mdev->mq) {
+		kmem_cache_free(maple_queue_cache, mdev->mq->recvbufdcsp);
+		kfree(mdev->mq);
+	}
+	kfree(mdev);
 }
 
 /* process the command queue into a maple command block
@@ -184,153 +187,153 @@ static void maple_free_dev(struct maple_device *mdev)
  */
 static void maple_build_block(struct mapleq *mq)
 {
-       int port, unit, from, to, len;
-       unsigned long *lsendbuf = mq->sendbuf;
+	int port, unit, from, to, len;
+	unsigned long *lsendbuf = mq->sendbuf;
 
-       port = mq->dev->port & 3;
-       unit = mq->dev->unit;
-       len = mq->length;
-       from = port << 6;
-       to = (port << 6) | (unit > 0 ? (1 << (unit - 1)) & 0x1f : 0x20);
+	port = mq->dev->port & 3;
+	unit = mq->dev->unit;
+	len = mq->length;
+	from = port << 6;
+	to = (port << 6) | (unit > 0 ? (1 << (unit - 1)) & 0x1f : 0x20);
 
-       *maple_lastptr &= 0x7fffffff;
-       maple_lastptr = maple_sendptr;
+	*maple_lastptr &= 0x7fffffff;
+	maple_lastptr = maple_sendptr;
 
-       *maple_sendptr++ = (port << 16) | len | 0x80000000;
-       *maple_sendptr++ = PHYSADDR(mq->recvbuf);
-       *maple_sendptr++ =
-           mq->command | (to << 8) | (from << 16) | (len << 24);
+	*maple_sendptr++ = (port << 16) | len | 0x80000000;
+	*maple_sendptr++ = PHYSADDR(mq->recvbuf);
+	*maple_sendptr++ =
+	    mq->command | (to << 8) | (from << 16) | (len << 24);
 
-       while (len-- > 0)
-               *maple_sendptr++ = *lsendbuf++;
+	while (len-- > 0)
+		*maple_sendptr++ = *lsendbuf++;
 }
 
 /* build up command queue */
 static void maple_send(void)
 {
-       int i;
-       int maple_packets;
-       struct mapleq *mq, *nmq;
-
-       if (!list_empty(&maple_sentq))
-               return;
-       if (list_empty(&maple_waitq) || !maple_dma_done())
-               return;
-       maple_packets = 0;
-       maple_sendptr = maple_lastptr = maple_sendbuf;
-       list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
-               maple_build_block(mq);
-               list_move(&mq->list, &maple_sentq);
-               if (maple_packets++ > MAPLE_MAXPACKETS)
-                       break;
-       }
-       if (maple_packets > 0) {
-               for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
-                       dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
-                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
-       }
+	int i;
+	int maple_packets;
+	struct mapleq *mq, *nmq;
+
+	if (!list_empty(&maple_sentq))
+		return;
+	if (list_empty(&maple_waitq) || !maple_dma_done())
+		return;
+	maple_packets = 0;
+	maple_sendptr = maple_lastptr = maple_sendbuf;
+	list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
+		maple_build_block(mq);
+		list_move(&mq->list, &maple_sentq);
+		if (maple_packets++ > MAPLE_MAXPACKETS)
+			break;
+	}
+	if (maple_packets > 0) {
+		for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
+			dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
+				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
 }
 
 static int attach_matching_maple_driver(struct device_driver *driver,
-                                       void *devptr)
+					void *devptr)
 {
-       struct maple_driver *maple_drv;
-       struct maple_device *mdev;
-
-       mdev = devptr;
-       maple_drv = to_maple_driver(driver);
-       if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
-               if (maple_drv->connect(mdev) == 0) {
-                       mdev->driver = maple_drv;
-                       return 1;
-               }
-       }
-       return 0;
+	struct maple_driver *maple_drv;
+	struct maple_device *mdev;
+
+	mdev = devptr;
+	maple_drv = to_maple_driver(driver);
+	if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
+		if (maple_drv->connect(mdev) == 0) {
+			mdev->driver = maple_drv;
+			return 1;
+		}
+	}
+	return 0;
 }
 
 static void maple_detach_driver(struct maple_device *mdev)
 {
-       if (!mdev)
-               return;
-       if (mdev->driver) {
-               if (mdev->driver->disconnect)
-                       mdev->driver->disconnect(mdev);
-       }
-       mdev->driver = NULL;
-       if (mdev->registered) {
-               maple_release_device(&mdev->dev);
-               device_unregister(&mdev->dev);
-       }
-       mdev->registered = 0;
-       maple_free_dev(mdev);
+	if (!mdev)
+		return;
+	if (mdev->driver) {
+		if (mdev->driver->disconnect)
+			mdev->driver->disconnect(mdev);
+	}
+	mdev->driver = NULL;
+	if (mdev->registered) {
+		maple_release_device(&mdev->dev);
+		device_unregister(&mdev->dev);
+	}
+	mdev->registered = 0;
+	maple_free_dev(mdev);
 }
 
 /* process initial MAPLE_COMMAND_DEVINFO for each device or port */
 static void maple_attach_driver(struct maple_device *dev)
 {
-       char *p;
-
-       char *recvbuf;
-       unsigned long function;
-       int matched, retval;
-
-       recvbuf = dev->mq->recvbuf;
-       memcpy(&dev->devinfo, recvbuf + 4, sizeof(dev->devinfo));
-       memcpy(dev->product_name, dev->devinfo.product_name, 30);
-       memcpy(dev->product_licence, dev->devinfo.product_licence, 60);
-       dev->product_name[30] = '\0';
-       dev->product_licence[60] = '\0';
-
-       for (p = dev->product_name + 29; dev->product_name <= p; p--)
-               if (*p == ' ')
-                       *p = '\0';
-               else
-                       break;
-
-       for (p = dev->product_licence + 59; dev->product_licence <= p; p--)
-               if (*p == ' ')
-                       *p = '\0';
-               else
-                       break;
-
-       function = be32_to_cpu(dev->devinfo.function);
-
-       if (function > 0x200) {
-               /* Do this silently - as not a real device */
-               function = 0;
-               dev->driver = &maple_dummy_driver;
-               sprintf(dev->dev.bus_id, "%d:0.port", dev->port);
-       } else {
-               printk(KERN_INFO
-                      "Maple bus at (%d, %d): Connected function 0x%lX\n",
-                      dev->port, dev->unit, function);
-
-               matched =
-                   bus_for_each_drv(&maple_bus_type, NULL, dev,
-                                    attach_matching_maple_driver);
-
-               if (matched == 0) {
-                       /* Driver does not exist yet */
-                       printk(KERN_INFO
-                              "No maple driver found for this device\n");
-                       dev->driver = &maple_dummy_driver;
-               }
-
-               sprintf(dev->dev.bus_id, "%d:0%d.%lX", dev->port,
-                       dev->unit, function);
-       }
-       dev->function = function;
-       dev->dev.bus = &maple_bus_type;
-       dev->dev.parent = &maple_bus;
-       dev->dev.release = &maple_release_device;
-       retval = device_register(&dev->dev);
-       if (retval) {
-               printk(KERN_INFO
-                      "Maple bus: Attempt to register device (%x, %x) failed.\n",
-                      dev->port, dev->unit);
-               maple_free_dev(dev);
-       }
-       dev->registered = 1;
+	char *p;
+
+	char *recvbuf;
+	unsigned long function;
+	int matched, retval;
+
+	recvbuf = dev->mq->recvbuf;
+	memcpy(&dev->devinfo, recvbuf + 4, sizeof(dev->devinfo));
+	memcpy(dev->product_name, dev->devinfo.product_name, 30);
+	memcpy(dev->product_licence, dev->devinfo.product_licence, 60);
+	dev->product_name[30] = '\0';
+	dev->product_licence[60] = '\0';
+
+	for (p = dev->product_name + 29; dev->product_name <= p; p--)
+		if (*p == ' ')
+			*p = '\0';
+		else
+			break;
+
+	for (p = dev->product_licence + 59; dev->product_licence <= p; p--)
+		if (*p == ' ')
+			*p = '\0';
+		else
+			break;
+
+	function = be32_to_cpu(dev->devinfo.function);
+
+	if (function > 0x200) {
+		/* Do this silently - as not a real device */
+		function = 0;
+		dev->driver = &maple_dummy_driver;
+		sprintf(dev->dev.bus_id, "%d:0.port", dev->port);
+	} else {
+		printk(KERN_INFO
+		       "Maple bus at (%d, %d): Connected function 0x%lX\n",
+		       dev->port, dev->unit, function);
+
+		matched =
+		    bus_for_each_drv(&maple_bus_type, NULL, dev,
+				     attach_matching_maple_driver);
+
+		if (matched == 0) {
+			/* Driver does not exist yet */
+			printk(KERN_INFO
+			       "No maple driver found for this device\n");
+			dev->driver = &maple_dummy_driver;
+		}
+
+		sprintf(dev->dev.bus_id, "%d:0%d.%lX", dev->port,
+			dev->unit, function);
+	}
+	dev->function = function;
+	dev->dev.bus = &maple_bus_type;
+	dev->dev.parent = &maple_bus;
+	dev->dev.release = &maple_release_device;
+	retval = device_register(&dev->dev);
+	if (retval) {
+		printk(KERN_INFO
+		       "Maple bus: Attempt to register device (%x, %x) failed.\n",
+		       dev->port, dev->unit);
+		maple_free_dev(dev);
+	}
+	dev->registered = 1;
 }
 
 /*
@@ -340,270 +343,271 @@ static void maple_attach_driver(struct maple_device *dev)
  */
 static int detach_maple_device(struct device *device, void *portptr)
 {
-       struct maple_device_specify *ds;
-       struct maple_device *mdev;
-
-       ds = portptr;
-       mdev = to_maple_dev(device);
-       if (mdev->port == ds->port && mdev->unit == ds->unit)
-               return 1;
-       return 0;
+	struct maple_device_specify *ds;
+	struct maple_device *mdev;
+
+	ds = portptr;
+	mdev = to_maple_dev(device);
+	if (mdev->port == ds->port && mdev->unit == ds->unit)
+		return 1;
+	return 0;
 }
 
 static int setup_maple_commands(struct device *device, void *ignored)
 {
-       struct maple_device *maple_dev = to_maple_dev(device);
-
-       if ((maple_dev->interval > 0)
-           && time_after(jiffies, maple_dev->when)) {
-               maple_dev->when = jiffies + maple_dev->interval;
-               maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
-               maple_dev->mq->sendbuf = &maple_dev->function;
-               maple_dev->mq->length = 1;
-               maple_add_packet(maple_dev->mq);
-               liststatus++;
-       } else {
-               if (time_after(jiffies, maple_pnp_time)) {
-                       maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
-                       maple_dev->mq->length = 0;
-                       maple_add_packet(maple_dev->mq);
-                       liststatus++;
-               }
-       }
-
-       return 0;
+	struct maple_device *maple_dev = to_maple_dev(device);
+
+	if ((maple_dev->interval > 0)
+	    && time_after(jiffies, maple_dev->when)) {
+		maple_dev->when = jiffies + maple_dev->interval;
+		maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
+		maple_dev->mq->sendbuf = &maple_dev->function;
+		maple_dev->mq->length = 1;
+		maple_add_packet(maple_dev->mq);
+		liststatus++;
+	} else {
+		if (time_after(jiffies, maple_pnp_time)) {
+			maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
+			maple_dev->mq->length = 0;
+			maple_add_packet(maple_dev->mq);
+			liststatus++;
+		}
+	}
+
+	return 0;
 }
 
 /* VBLANK bottom half - implemented via workqueue */
 static void maple_vblank_handler(struct work_struct *work)
 {
-       if (!maple_dma_done())
-               return;
-       if (!list_empty(&maple_sentq))
-               return;
-       ctrl_outl(0, MAPLE_ENABLE);
-       liststatus = 0;
-       bus_for_each_dev(&maple_bus_type, NULL, NULL,
-                        setup_maple_commands);
-       if (time_after(jiffies, maple_pnp_time))
-               maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
-       if (liststatus && list_empty(&maple_sentq)) {
-               INIT_LIST_HEAD(&maple_sentq);
-               maple_send();
-       }
-       maplebus_dma_reset();
+	if (!maple_dma_done())
+		return;
+	if (!list_empty(&maple_sentq))
+		return;
+	ctrl_outl(0, MAPLE_ENABLE);
+	liststatus = 0;
+	bus_for_each_dev(&maple_bus_type, NULL, NULL,
+			 setup_maple_commands);
+	if (time_after(jiffies, maple_pnp_time))
+		maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
+	if (liststatus && list_empty(&maple_sentq)) {
+		INIT_LIST_HEAD(&maple_sentq);
+		maple_send();
+	}
+	maplebus_dma_reset();
 }
 
 /* handle devices added via hotplugs - placing them on queue for DEVINFO*/
 static void maple_map_subunits(struct maple_device *mdev, int submask)
 {
-       int retval, k, devcheck;
-       struct maple_device *mdev_add;
-       struct maple_device_specify ds;
-
-       for (k = 0; k < 5; k++) {
-               ds.port = mdev->port;
-               ds.unit = k + 1;
-               retval =
-                   bus_for_each_dev(&maple_bus_type, NULL, &ds,
-                                    detach_maple_device);
-               if (retval) {
-                       submask = submask >> 1;
-                       continue;
-               }
-               devcheck = submask & 0x01;
-               if (devcheck) {
-                       mdev_add = maple_alloc_dev(mdev->port, k + 1);
-                       if (!mdev_add)
-                               return;
-                       mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
-                       mdev_add->mq->length = 0;
-                       maple_add_packet(mdev_add->mq);
-                       scanning = 1;
-               }
-               submask = submask >> 1;
-       }
+	int retval, k, devcheck;
+	struct maple_device *mdev_add;
+	struct maple_device_specify ds;
+
+	for (k = 0; k < 5; k++) {
+		ds.port = mdev->port;
+		ds.unit = k + 1;
+		retval =
+		    bus_for_each_dev(&maple_bus_type, NULL, &ds,
+				     detach_maple_device);
+		if (retval) {
+			submask = submask >> 1;
+			continue;
+		}
+		devcheck = submask & 0x01;
+		if (devcheck) {
+			mdev_add = maple_alloc_dev(mdev->port, k + 1);
+			if (!mdev_add)
+				return;
+			mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
+			mdev_add->mq->length = 0;
+			maple_add_packet(mdev_add->mq);
+			scanning = 1;
+		}
+		submask = submask >> 1;
+	}
 }
 
 /* mark a device as removed */
 static void maple_clean_submap(struct maple_device *mdev)
 {
-       int killbit;
+	int killbit;
 
-       killbit = (mdev->unit > 0 ? (1 << (mdev->unit - 1)) & 0x1f : 0x20);
-       killbit = ~killbit;
-       killbit &= 0xFF;
-       subdevice_map[mdev->port] = subdevice_map[mdev->port] & killbit;
+	killbit = (mdev->unit > 0 ? (1 << (mdev->unit - 1)) & 0x1f : 0x20);
+	killbit = ~killbit;
+	killbit &= 0xFF;
+	subdevice_map[mdev->port] = subdevice_map[mdev->port] & killbit;
 }
 
 /* handle empty port or hotplug removal */
 static void maple_response_none(struct maple_device *mdev,
-                               struct mapleq *mq)
+				struct mapleq *mq)
 {
-       if (mdev->unit != 0) {
-               list_del(&mq->list);
-               maple_clean_submap(mdev);
-               printk(KERN_INFO
-                      "Maple bus device detaching at (%d, %d)\n",
-                      mdev->port, mdev->unit);
-               maple_detach_driver(mdev);
-               return;
-       }
-       if (!started) {
-               printk(KERN_INFO "No maple devices attached to port %d\n",
-                      mdev->port);
-               return;
-       }
-       maple_clean_submap(mdev);
+	if (mdev->unit != 0) {
+		list_del(&mq->list);
+		maple_clean_submap(mdev);
+		printk(KERN_INFO
+		       "Maple bus device detaching at (%d, %d)\n",
+		       mdev->port, mdev->unit);
+		maple_detach_driver(mdev);
+		return;
+	}
+	if (!started) {
+		printk(KERN_INFO "No maple devices attached to port %d\n",
+		       mdev->port);
+		return;
+	}
+	maple_clean_submap(mdev);
 }
 
 /* preprocess hotplugs or scans */
 static void maple_response_devinfo(struct maple_device *mdev,
-                                  char *recvbuf)
+				   char *recvbuf)
 {
-       char submask;
-       if ((!started) || (scanning == 2)) {
-               maple_attach_driver(mdev);
-               return;
-       }
-       if (mdev->unit == 0) {
-               submask = recvbuf[2] & 0x1F;
-               if (submask ^ subdevice_map[mdev->port]) {
-                       maple_map_subunits(mdev, submask);
-                       subdevice_map[mdev->port] = submask;
-               }
-       }
+	char submask;
+	if ((!started) || (scanning == 2)) {
+		maple_attach_driver(mdev);
+		return;
+	}
+	if (mdev->unit == 0) {
+		submask = recvbuf[2] & 0x1F;
+		if (submask ^ subdevice_map[mdev->port]) {
+			maple_map_subunits(mdev, submask);
+			subdevice_map[mdev->port] = submask;
+		}
+	}
 }
 
 /* maple dma end bottom half - implemented via workqueue */
 static void maple_dma_handler(struct work_struct *work)
 {
-       struct mapleq *mq, *nmq;
-       struct maple_device *dev;
-       char *recvbuf;
-       enum maple_code code;
-
-       if (!maple_dma_done())
-               return;
-       ctrl_outl(0, MAPLE_ENABLE);
-       if (!list_empty(&maple_sentq)) {
-               list_for_each_entry_safe(mq, nmq, &maple_sentq, list) {
-                       recvbuf = mq->recvbuf;
-                       code = recvbuf[0];
-                       dev = mq->dev;
-                       switch (code) {
-                       case MAPLE_RESPONSE_NONE:
-                               maple_response_none(dev, mq);
-                               break;
-
-                       case MAPLE_RESPONSE_DEVINFO:
-                               maple_response_devinfo(dev, recvbuf);
-                               break;
-
-                       case MAPLE_RESPONSE_DATATRF:
-                               if (dev->callback)
-                                       dev->callback(mq);
-                               break;
-
-                       case MAPLE_RESPONSE_FILEERR:
-                       case MAPLE_RESPONSE_AGAIN:
-                       case MAPLE_RESPONSE_BADCMD:
-                       case MAPLE_RESPONSE_BADFUNC:
-                               printk(KERN_DEBUG
-                                      "Maple non-fatal error 0x%X\n",
-                                      code);
-                               break;
-
-                       case MAPLE_RESPONSE_ALLINFO:
-                               printk(KERN_DEBUG
-                                      "Maple - extended device information not supported\n");
-                               break;
-
-                       case MAPLE_RESPONSE_OK:
-                               break;
-
-                       default:
-                               break;
-                       }
-               }
-               INIT_LIST_HEAD(&maple_sentq);
-               if (scanning == 1) {
-                       maple_send();
-                       scanning = 2;
-               } else
-                       scanning = 0;
-
-               if (started == 0)
-                       started = 1;
-       }
-       maplebus_dma_reset();
+	struct mapleq *mq, *nmq;
+	struct maple_device *dev;
+	char *recvbuf;
+	enum maple_code code;
+
+	if (!maple_dma_done())
+		return;
+	ctrl_outl(0, MAPLE_ENABLE);
+	if (!list_empty(&maple_sentq)) {
+		list_for_each_entry_safe(mq, nmq, &maple_sentq, list) {
+			recvbuf = mq->recvbuf;
+			code = recvbuf[0];
+			dev = mq->dev;
+			switch (code) {
+			case MAPLE_RESPONSE_NONE:
+				maple_response_none(dev, mq);
+				break;
+
+			case MAPLE_RESPONSE_DEVINFO:
+				maple_response_devinfo(dev, recvbuf);
+				break;
+
+			case MAPLE_RESPONSE_DATATRF:
+				if (dev->callback)
+					dev->callback(mq);
+				break;
+
+			case MAPLE_RESPONSE_FILEERR:
+			case MAPLE_RESPONSE_AGAIN:
+			case MAPLE_RESPONSE_BADCMD:
+			case MAPLE_RESPONSE_BADFUNC:
+				printk(KERN_DEBUG
+				       "Maple non-fatal error 0x%X\n",
+				       code);
+				break;
+
+			case MAPLE_RESPONSE_ALLINFO:
+				printk(KERN_DEBUG
+				       "Maple - extended device information not supported\n");
+				break;
+
+			case MAPLE_RESPONSE_OK:
+				break;
+
+			default:
+				break;
+			}
+		}
+		INIT_LIST_HEAD(&maple_sentq);
+		if (scanning == 1) {
+			maple_send();
+			scanning = 2;
+		} else
+			scanning = 0;
+
+		if (started == 0)
+			started = 1;
+	}
+	maplebus_dma_reset();
 }
 
 static irqreturn_t maplebus_dma_interrupt(int irq, void *dev_id)
 {
-       /* Load everything into the bottom half */
-       schedule_work(&maple_dma_process);
-       return IRQ_HANDLED;
+	/* Load everything into the bottom half */
+	schedule_work(&maple_dma_process);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t maplebus_vblank_interrupt(int irq, void *dev_id)
 {
-       schedule_work(&maple_vblank_process);
-       return IRQ_HANDLED;
+	schedule_work(&maple_vblank_process);
+	return IRQ_HANDLED;
 }
 
 static struct irqaction maple_dma_irq = {
-       .name = "maple bus DMA handler",
-       .handler = maplebus_dma_interrupt,
-       .flags = IRQF_SHARED,
+	.name = "maple bus DMA handler",
+	.handler = maplebus_dma_interrupt,
+	.flags = IRQF_SHARED,
 };
 
 static struct irqaction maple_vblank_irq = {
-       .name = "maple bus VBLANK handler",
-       .handler = maplebus_vblank_interrupt,
-       .flags = IRQF_SHARED,
+	.name = "maple bus VBLANK handler",
+	.handler = maplebus_vblank_interrupt,
+	.flags = IRQF_SHARED,
 };
 
 static int maple_set_dma_interrupt_handler(void)
 {
-       return setup_irq(HW_EVENT_MAPLE_DMA, &maple_dma_irq);
+	return setup_irq(HW_EVENT_MAPLE_DMA, &maple_dma_irq);
 }
 
 static int maple_set_vblank_interrupt_handler(void)
 {
-       return setup_irq(HW_EVENT_VSYNC, &maple_vblank_irq);
+	return setup_irq(HW_EVENT_VSYNC, &maple_vblank_irq);
 }
 
 static int maple_get_dma_buffer(void)
 {
-       maple_sendbuf =
-           (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                     MAPLE_DMA_PAGES);
-       if (!maple_sendbuf)
-               return -ENOMEM;
-       return 0;
+	maple_sendbuf =
+	    (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+				      MAPLE_DMA_PAGES);
+	if (!maple_sendbuf)
+		return -ENOMEM;
+	return 0;
 }
 
 static int match_maple_bus_driver(struct device *devptr,
-                                 struct device_driver *drvptr)
+				  struct device_driver *drvptr)
 {
-       struct maple_driver *maple_drv;
-       struct maple_device *maple_dev;
-
-       maple_drv = container_of(drvptr, struct maple_driver, drv);
-       maple_dev = container_of(devptr, struct maple_device, dev);
-       /* Trap empty port case */
-       if (maple_dev->devinfo.function == 0xFFFFFFFF)
-               return 0;
-       else if (maple_dev->devinfo.function &
-                be32_to_cpu(maple_drv->function))
-               return 1;
-       return 0;
+	struct maple_driver *maple_drv;
+	struct maple_device *maple_dev;
+
+	maple_drv = container_of(drvptr, struct maple_driver, drv);
+	maple_dev = container_of(devptr, struct maple_device, dev);
+	/* Trap empty port case */
+	if (maple_dev->devinfo.function == 0xFFFFFFFF)
+		return 0;
+	else if (maple_dev->devinfo.function &
+		 be32_to_cpu(maple_drv->function))
+		return 1;
+	return 0;
 }
 
-static int maple_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int maple_bus_uevent(struct device *dev,
+			    struct kobj_uevent_env *env)
 {
-       return 0;
+	return 0;
 }
 
 static void maple_bus_release(struct device *dev)
@@ -611,124 +615,126 @@ static void maple_bus_release(struct device *dev)
 }
 
 static struct maple_driver maple_dummy_driver = {
-       .drv = {
-               .name = "maple_dummy_driver",
-               .bus =  &maple_bus_type,
-       },
+	.drv = {
+		.name = "maple_dummy_driver",
+		.bus = &maple_bus_type,
+		},
 };
 
 struct bus_type maple_bus_type = {
-       .name =         "maple",
-       .match =        match_maple_bus_driver,
-       .uevent =       maple_bus_uevent,
+	.name = "maple",
+	.match = match_maple_bus_driver,
+	.uevent = maple_bus_uevent,
 };
+
 EXPORT_SYMBOL_GPL(maple_bus_type);
 
 static struct device maple_bus = {
-       .bus_id = "maple",
-       .release = maple_bus_release,
+	.bus_id = "maple",
+	.release = maple_bus_release,
 };
 
 static int __init maple_bus_init(void)
 {
-       int retval, i;
-       struct maple_device *mdev[MAPLE_PORTS];
-       ctrl_outl(0, MAPLE_STATE);
-
-       retval = device_register(&maple_bus);
-       if (retval)
-               goto cleanup;
-
-       retval = bus_register(&maple_bus_type);
-       if (retval)
-               goto cleanup_device;
-
-       retval = driver_register(&maple_dummy_driver.drv);
-
-       if (retval)
-               goto cleanup_bus;
-
-       /* allocate memory for maple bus dma */
-       retval = maple_get_dma_buffer();
-       if (retval) {
-               printk(KERN_INFO
-                      "Maple bus: Failed to allocate Maple DMA buffers\n");
-               goto cleanup_basic;
-       }
-
-       /* set up DMA interrupt handler */
-       retval = maple_set_dma_interrupt_handler();
-       if (retval) {
-               printk(KERN_INFO
-                      "Maple bus: Failed to grab maple DMA IRQ\n");
-               goto cleanup_dma;
-       }
-
-       /* set up VBLANK interrupt handler */
-       retval = maple_set_vblank_interrupt_handler();
-       if (retval) {
-               printk(KERN_INFO "Maple bus: Failed to grab VBLANK IRQ\n");
-               goto cleanup_irq;
-       }
-
-       maple_queue_cache =
-           kmem_cache_create("maple_queue_cache", 0x400, 0,
-                             SLAB_HWCACHE_ALIGN, NULL);
-
-       if (!maple_queue_cache)
-               goto cleanup_bothirqs;
-
-       /* setup maple ports */
-       for (i = 0; i < MAPLE_PORTS; i++) {
-               mdev[i] = maple_alloc_dev(i, 0);
-               if (!mdev[i]) {
-                       while (i-- > 0)
-                               maple_free_dev(mdev[i]);
-                       goto cleanup_cache;
-               }
-               mdev[i]->registered = 0;
-               mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
-               mdev[i]->mq->length = 0;
-               maple_attach_driver(mdev[i]);
-               maple_add_packet(mdev[i]->mq);
-               subdevice_map[i] = 0;
-       }
-
-       /* setup maplebus hardware */
-       maplebus_dma_reset();
-
-       /* initial detection */
-       maple_send();
-
-       maple_pnp_time = jiffies;
-
-       printk(KERN_INFO "Maple bus core now registered.\n");
-
-       return 0;
-
-cleanup_cache:
-       kmem_cache_destroy(maple_queue_cache);
-
-cleanup_bothirqs:
-       free_irq(HW_EVENT_VSYNC, 0);
-
-cleanup_irq:
-       free_irq(HW_EVENT_MAPLE_DMA, 0);
-
-cleanup_dma:
-       free_pages((unsigned long) maple_sendbuf, MAPLE_DMA_PAGES);
-
-cleanup_basic:
-       driver_unregister(&maple_dummy_driver.drv);
-
-cleanup_bus:
-       bus_unregister(&maple_bus_type);
-
-cleanup_device:
-       device_unregister(&maple_bus);
-
-cleanup:
-       printk(KERN_INFO "Maple bus registration failed\n");
-       return retval;
+	int retval, i;
+	struct maple_device *mdev[MAPLE_PORTS];
+	ctrl_outl(0, MAPLE_STATE);
+
+	retval = device_register(&maple_bus);
+	if (retval)
+		goto cleanup;
+
+	retval = bus_register(&maple_bus_type);
+	if (retval)
+		goto cleanup_device;
+
+	retval = driver_register(&maple_dummy_driver.drv);
+
+	if (retval)
+		goto cleanup_bus;
+
+	/* allocate memory for maple bus dma */
+	retval = maple_get_dma_buffer();
+	if (retval) {
+		printk(KERN_INFO
+		       "Maple bus: Failed to allocate Maple DMA buffers\n");
+		goto cleanup_basic;
+	}
+
+	/* set up DMA interrupt handler */
+	retval = maple_set_dma_interrupt_handler();
+	if (retval) {
+		printk(KERN_INFO
+		       "Maple bus: Failed to grab maple DMA IRQ\n");
+		goto cleanup_dma;
+	}
+
+	/* set up VBLANK interrupt handler */
+	retval = maple_set_vblank_interrupt_handler();
+	if (retval) {
+		printk(KERN_INFO "Maple bus: Failed to grab VBLANK IRQ\n");
+		goto cleanup_irq;
+	}
+
+	maple_queue_cache =
+	    kmem_cache_create("maple_queue_cache", 0x400, 0,
+			      SLAB_HWCACHE_ALIGN, NULL);
+
+	if (!maple_queue_cache)
+		goto cleanup_bothirqs;
+
+	/* setup maple ports */
+	for (i = 0; i < MAPLE_PORTS; i++) {
+		mdev[i] = maple_alloc_dev(i, 0);
+		if (!mdev[i]) {
+			while (i-- > 0)
+				maple_free_dev(mdev[i]);
+			goto cleanup_cache;
+		}
+		mdev[i]->registered = 0;
+		mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
+		mdev[i]->mq->length = 0;
+		maple_attach_driver(mdev[i]);
+		maple_add_packet(mdev[i]->mq);
+		subdevice_map[i] = 0;
+	}
+
+	/* setup maplebus hardware */
+	maplebus_dma_reset();
+
+	/* initial detection */
+	maple_send();
+
+	maple_pnp_time = jiffies;
+
+	printk(KERN_INFO "Maple bus core now registered.\n");
+
+	return 0;
+
+      cleanup_cache:
+	kmem_cache_destroy(maple_queue_cache);
+
+      cleanup_bothirqs:
+	free_irq(HW_EVENT_VSYNC, 0);
+
+      cleanup_irq:
+	free_irq(HW_EVENT_MAPLE_DMA, 0);
+
+      cleanup_dma:
+	free_pages((unsigned long) maple_sendbuf, MAPLE_DMA_PAGES);
+
+      cleanup_basic:
+	driver_unregister(&maple_dummy_driver.drv);
+
+      cleanup_bus:
+	bus_unregister(&maple_bus_type);
+
+      cleanup_device:
+	device_unregister(&maple_bus);
+
+      cleanup:
+	printk(KERN_INFO "Maple bus registration failed\n");
+	return retval;
 }
+
 subsys_initcall(maple_bus_init);
diff --git a/include/linux/maple.h b/include/linux/maple.h
index bad9a7b319de..f82c17b77158 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -7,74 +7,75 @@ extern struct bus_type maple_bus_type;
 
 /* Maple Bus command and response codes */
 enum maple_code {
-       MAPLE_RESPONSE_FILEERR = -5,
-       MAPLE_RESPONSE_AGAIN = -4,      /* request should be retransmitted */
-       MAPLE_RESPONSE_BADCMD = -3,
-       MAPLE_RESPONSE_BADFUNC = -2,
-       MAPLE_RESPONSE_NONE = -1,       /* unit didn't respond at all */
-       MAPLE_COMMAND_DEVINFO = 1,
-       MAPLE_COMMAND_ALLINFO = 2,
-       MAPLE_COMMAND_RESET = 3,
-       MAPLE_COMMAND_KILL = 4,
-       MAPLE_RESPONSE_DEVINFO = 5,
-       MAPLE_RESPONSE_ALLINFO = 6,
-       MAPLE_RESPONSE_OK = 7,
-       MAPLE_RESPONSE_DATATRF = 8,
-       MAPLE_COMMAND_GETCOND = 9,
-       MAPLE_COMMAND_GETMINFO = 10,
-       MAPLE_COMMAND_BREAD = 11,
-       MAPLE_COMMAND_BWRITE = 12,
-       MAPLE_COMMAND_SETCOND = 14
+	MAPLE_RESPONSE_FILEERR = -5,
+	MAPLE_RESPONSE_AGAIN = -4,	/* request should be retransmitted */
+	MAPLE_RESPONSE_BADCMD = -3,
+	MAPLE_RESPONSE_BADFUNC = -2,
+	MAPLE_RESPONSE_NONE = -1,	/* unit didn't respond at all */
+	MAPLE_COMMAND_DEVINFO = 1,
+	MAPLE_COMMAND_ALLINFO = 2,
+	MAPLE_COMMAND_RESET = 3,
+	MAPLE_COMMAND_KILL = 4,
+	MAPLE_RESPONSE_DEVINFO = 5,
+	MAPLE_RESPONSE_ALLINFO = 6,
+	MAPLE_RESPONSE_OK = 7,
+	MAPLE_RESPONSE_DATATRF = 8,
+	MAPLE_COMMAND_GETCOND = 9,
+	MAPLE_COMMAND_GETMINFO = 10,
+	MAPLE_COMMAND_BREAD = 11,
+	MAPLE_COMMAND_BWRITE = 12,
+	MAPLE_COMMAND_SETCOND = 14
 };
 
 struct mapleq {
-       struct list_head list;
-       struct maple_device *dev;
-       void *sendbuf, *recvbuf, *recvbufdcsp;
-       unsigned char length;
-       enum maple_code command;
+	struct list_head list;
+	struct maple_device *dev;
+	void *sendbuf, *recvbuf, *recvbufdcsp;
+	unsigned char length;
+	enum maple_code command;
 };
 
 struct maple_devinfo {
-       unsigned long function;
-       unsigned long function_data[3];
-       unsigned char area_code;
-       unsigned char connector_directon;
-       char product_name[31];
-       char product_licence[61];
-       unsigned short standby_power;
-       unsigned short max_power;
+	unsigned long function;
+	unsigned long function_data[3];
+	unsigned char area_code;
+	unsigned char connector_directon;
+	char product_name[31];
+	char product_licence[61];
+	unsigned short standby_power;
+	unsigned short max_power;
 };
 
 struct maple_device {
-       struct maple_driver *driver;
-       struct mapleq *mq;
-       void *private_data;
-       void (*callback) (struct mapleq * mq);
-       unsigned long when, interval, function;
-       struct maple_devinfo devinfo;
-       unsigned char port, unit;
-       char product_name[32];
-       char product_licence[64];
-       int registered;
-       struct device dev;
+	struct maple_driver *driver;
+	struct mapleq *mq;
+	void *private_data;
+	void (*callback) (struct mapleq * mq);
+	unsigned long when, interval, function;
+	struct maple_devinfo devinfo;
+	unsigned char port, unit;
+	char product_name[32];
+	char product_licence[64];
+	int registered;
+	struct device dev;
 };
 
 struct maple_driver {
-       unsigned long function;
-       int (*connect) (struct maple_device * dev);
-       void (*disconnect) (struct maple_device * dev);
-       struct device_driver drv;
+	unsigned long function;
+	int (*connect) (struct maple_device * dev);
+	void (*disconnect) (struct maple_device * dev);
+	struct device_driver drv;
+	int registered;
 };
 
 void maple_getcond_callback(struct maple_device *dev,
-                           void (*callback) (struct mapleq * mq),
-                           unsigned long interval,
-                           unsigned long function);
+			    void (*callback) (struct mapleq * mq),
+			    unsigned long interval,
+			    unsigned long function);
 int maple_driver_register(struct device_driver *drv);
 void maple_add_packet(struct mapleq *mq);
 
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
 #define to_maple_driver(n) container_of(n, struct maple_driver, drv)
 
-#endif /* __LINUX_MAPLE_H */
+#endif				/* __LINUX_MAPLE_H */
-- 
cgit 


From 87153058b2e3bedfd339dbfec5dd6dd3d98677b0 Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@newgolddream.dyndns.info>
Date: Wed, 6 Feb 2008 23:59:56 +0000
Subject: maple: Drop unused prototypes from linux/maple.h.

This patch removes the now unneeded registration check variable from
struct maple_device. (This patch assumes the include/linux/maple.h file
has already been patched for whitespace errors by
http://lkml.org/lkml/2008/2/6/327)

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/maple.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/maple.h b/include/linux/maple.h
index f82c17b77158..35c3474ccf68 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -56,7 +56,6 @@ struct maple_device {
 	unsigned char port, unit;
 	char product_name[32];
 	char product_licence[64];
-	int registered;
 	struct device dev;
 };
 
-- 
cgit 


From 5c8f82c64941594cdab53bf9f9a66c190781f4f6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 Feb 2008 19:44:53 +0900
Subject: maple: Fix up maple build failure.

maple_devinfo->connector_direction had a typo, fix it up..

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/maple.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/maple.h b/include/linux/maple.h
index 35c3474ccf68..3f01e2bae1a1 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -39,7 +39,7 @@ struct maple_devinfo {
 	unsigned long function;
 	unsigned long function_data[3];
 	unsigned char area_code;
-	unsigned char connector_directon;
+	unsigned char connector_direction;
 	char product_name[31];
 	char product_licence[61];
 	unsigned short standby_power;
-- 
cgit 


From 46c1fbdb7191bf07979d7cd5f08d1a86458181a2 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 13 Feb 2008 23:13:25 -0500
Subject: ACPI: DMI: quirk for FSC ESPRIMO Mobile V5505

http://bugzilla.kernel.org/show_bug.cgi?id=9939

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/blacklist.c | 14 ++++++++++++++
 drivers/acpi/osl.c       |  2 +-
 include/linux/acpi.h     |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index dfa4ac8e9982..ea92bac42c53 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -186,6 +186,12 @@ static int __init dmi_unknown_osi_linux(const struct dmi_system_id *d)
 	acpi_dmi_osi_linux(-1, d);	/* unknown */
 	return 0;
 }
+static int __init dmi_disable_osi_vista(const struct dmi_system_id *d)
+{
+	printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
+	acpi_osi_setup("!Windows 2006");
+	return 0;
+}
 
 /*
  * Most BIOS that invoke OSI(Linux) do nothing with it.
@@ -434,6 +440,14 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
 		},
 	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Fujitsu Siemens",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+	 	     DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
+		},
+	},
 	/*
 	 * Disable OSI(Linux) warnings on all "Hewlett-Packard"
 	 *
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 15e602377655..0467171dbdb8 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1109,7 +1109,7 @@ void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d)
  * string starting with '!' disables that string
  * otherwise string is added to list, augmenting built-in strings
  */
-static int __init acpi_osi_setup(char *str)
+int __init acpi_osi_setup(char *str)
 {
 	if (str == NULL || *str == '\0') {
 		printk(KERN_INFO PREFIX "_OSI method disabled\n");
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ddbe7efe590e..2c7e003356ac 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -203,6 +203,7 @@ extern bool wmi_has_guid(const char *guid);
 extern int acpi_blacklisted(void);
 #ifdef CONFIG_DMI
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
+extern int acpi_osi_setup(char *str);
 #endif
 
 #ifdef CONFIG_ACPI_NUMA
-- 
cgit 


From 5a7780e725d1bb4c3094fcc12f1c5c5faea1e988 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Feb 2008 09:20:43 +0100
Subject: hrtimer: check relative timeouts for overflow

Various user space callers ask for relative timeouts. While we fixed
that overflow issue in hrtimer_start(), the sites which convert
relative user space values to absolute timeouts themself were uncovered.

Instead of putting overflow checks into each place add a function
which does the sanity checking and convert all affected callers to use
it.

Thanks to Frans Pop, who reported the problem and tested the fixes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Frans Pop <elendil@planet.nl>
---
 include/linux/ktime.h |  2 ++
 kernel/futex.c        |  2 +-
 kernel/futex_compat.c |  2 +-
 kernel/hrtimer.c      | 37 ++++++++++++++++++++-----------------
 kernel/posix-timers.c |  8 +++++---
 5 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 36c542b70c6d..2cd7fa73d1af 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -310,6 +310,8 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
 	return ktime_sub_ns(kt, usec * 1000);
 }
 
+extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
diff --git a/kernel/futex.c b/kernel/futex.c
index a6baaec44b8f..221f2128a437 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2116,7 +2116,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 
 		t = timespec_to_ktime(ts);
 		if (cmd == FUTEX_WAIT)
-			t = ktime_add(ktime_get(), t);
+			t = ktime_add_safe(ktime_get(), t);
 		tp = &t;
 	}
 	/*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 133d558db452..7d5e4b016f39 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -176,7 +176,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
 
 		t = timespec_to_ktime(ts);
 		if (cmd == FUTEX_WAIT)
-			t = ktime_add(ktime_get(), t);
+			t = ktime_add_safe(ktime_get(), t);
 		tp = &t;
 	}
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 3f4a57c7895d..c2893af9479e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -325,6 +325,23 @@ u64 ktime_divns(const ktime_t kt, s64 div)
 }
 #endif /* BITS_PER_LONG >= 64 */
 
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+	ktime_t res = ktime_add(lhs, rhs);
+
+	/*
+	 * We use KTIME_SEC_MAX here, the maximum timeout which we can
+	 * return to user space in a timespec:
+	 */
+	if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+		res = ktime_set(KTIME_SEC_MAX, 0);
+
+	return res;
+}
+
 /*
  * Check, whether the timer is on the callback pending list
  */
@@ -682,13 +699,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 		 */
 		orun++;
 	}
-	timer->expires = ktime_add(timer->expires, interval);
-	/*
-	 * Make sure, that the result did not wrap with a very large
-	 * interval.
-	 */
-	if (timer->expires.tv64 < 0)
-		timer->expires = ktime_set(KTIME_SEC_MAX, 0);
+	timer->expires = ktime_add_safe(timer->expires, interval);
 
 	return orun;
 }
@@ -839,7 +850,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 	new_base = switch_hrtimer_base(timer, base);
 
 	if (mode == HRTIMER_MODE_REL) {
-		tim = ktime_add(tim, new_base->get_time());
+		tim = ktime_add_safe(tim, new_base->get_time());
 		/*
 		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
 		 * to signal that they simply return xtime in
@@ -848,16 +859,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 		 * timeouts. This will go away with the GTOD framework.
 		 */
 #ifdef CONFIG_TIME_LOW_RES
-		tim = ktime_add(tim, base->resolution);
+		tim = ktime_add_safe(tim, base->resolution);
 #endif
-		/*
-		 * Careful here: User space might have asked for a
-		 * very long sleep, so the add above might result in a
-		 * negative number, which enqueues the timer in front
-		 * of the queue.
-		 */
-		if (tim.tv64 < 0)
-			tim.tv64 = KTIME_MAX;
 	}
 	timer->expires = tim;
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 022c9c3cee6f..a9b04203a66d 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -767,9 +767,11 @@ common_timer_set(struct k_itimer *timr, int flags,
 	/* SIGEV_NONE timers are not queued ! See common_timer_get */
 	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
 		/* Setup correct expiry time for relative timers */
-		if (mode == HRTIMER_MODE_REL)
-			timer->expires = ktime_add(timer->expires,
-						   timer->base->get_time());
+		if (mode == HRTIMER_MODE_REL) {
+			timer->expires =
+				ktime_add_safe(timer->expires,
+					       timer->base->get_time());
+		}
 		return 0;
 	}
 
-- 
cgit 


From eada35efcb2773cf49aa26277e056122e1a3405c Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 11 Feb 2008 22:47:46 +0200
Subject: slub: kmalloc page allocator pass-through cleanup

This adds a proper function for kmalloc page allocator pass-through. While it
simplifies any code that does slab tracing code a lot, I think it's a
worthwhile cleanup in itself.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  8 ++++++--
 mm/slub.c                | 14 ++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5e6d3d634d5b..a849c472b845 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -188,12 +188,16 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+}
+
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 		if (size > PAGE_SIZE / 2)
-			return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+			return kmalloc_large(size, flags);
 
 		if (!(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
diff --git a/mm/slub.c b/mm/slub.c
index e2989ae243b5..7870ef9d8636 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2671,8 +2671,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
 
@@ -2689,8 +2688,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
 
@@ -3219,8 +3217,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, gfpflags);
+
 	s = get_slab(size, gfpflags);
 
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -3235,8 +3233,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, gfpflags);
+
 	s = get_slab(size, gfpflags);
 
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
-- 
cgit 


From b7a49f0d4c34166ae84089d9f145cfaae1b0eec5 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 14 Feb 2008 14:21:32 -0800
Subject: slub: Determine gfpflags once and not every time a slab is allocated

Currently we determine the gfp flags to pass to the page allocator
each time a slab is being allocated.

Determine the bits to be set at the time the slab is created. Store
in a new allocflags field and add the flags in allocate_slab().

Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  1 +
 mm/slub.c                | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a849c472b845..98be113cf935 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -71,6 +71,7 @@ struct kmem_cache {
 
 	/* Allocation and freeing of slabs */
 	int objects;		/* Number of objects in slab */
+	gfp_t allocflags;	/* gfp flags to use on each alloc */
 	int refcount;		/* Refcount for slab cache destroy */
 	void (*ctor)(struct kmem_cache *, void *);
 	int inuse;		/* Offset to metadata */
diff --git a/mm/slub.c b/mm/slub.c
index 1af7f2f19420..ccfd41141b6b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1078,14 +1078,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	struct page *page;
 	int pages = 1 << s->order;
 
-	if (s->order)
-		flags |= __GFP_COMP;
-
-	if (s->flags & SLAB_CACHE_DMA)
-		flags |= SLUB_DMA;
-
-	if (s->flags & SLAB_RECLAIM_ACCOUNT)
-		flags |= __GFP_RECLAIMABLE;
+	flags |= s->allocflags;
 
 	if (node == -1)
 		page = alloc_pages(flags, s->order);
@@ -2333,6 +2326,16 @@ static int calculate_sizes(struct kmem_cache *s)
 	if (s->order < 0)
 		return 0;
 
+	s->allocflags = 0;
+	if (s->order)
+		s->allocflags |= __GFP_COMP;
+
+	if (s->flags & SLAB_CACHE_DMA)
+		s->allocflags |= SLUB_DMA;
+
+	if (s->flags & SLAB_RECLAIM_ACCOUNT)
+		s->allocflags |= __GFP_RECLAIMABLE;
+
 	/*
 	 * Determine the number of objects per slab
 	 */
-- 
cgit 


From 331dc558fa020451ff773973cee855fd721aa88e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 14 Feb 2008 14:28:09 -0800
Subject: slub: Support 4k kmallocs again to compensate for page allocator
 slowness

Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the
mistaken belief that the page allocator can handle these allocations
effectively. However, measurements indicate a minimum slowdown by the
factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath
which causes regressions in tbench.

Increase the number of kmalloc caches by one so that we again handle 4k
kmallocs directly from slub. 4k page buffering for the page allocator
will be performed by slub like done by slab.

At some point the page allocator fastpath should be fixed. A lot of the kernel
would benefit from a faster ability to allocate a single page. If that is
done then the 4k allocs may again be forwarded to the page allocator and this
patch could be reverted.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  6 +++---
 mm/slub.c                | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 98be113cf935..57deecc79d52 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -111,7 +111,7 @@ struct kmem_cache {
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[PAGE_SHIFT];
+extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -197,7 +197,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
-		if (size > PAGE_SIZE / 2)
+		if (size > PAGE_SIZE)
 			return kmalloc_large(size, flags);
 
 		if (!(flags & SLUB_DMA)) {
@@ -219,7 +219,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) &&
-		size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) {
+		size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
 
 		if (!s)
diff --git a/mm/slub.c b/mm/slub.c
index 644fd0aaeaf1..4b3895cb90ee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2517,11 +2517,11 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 #ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
 #endif
 
 static int __init setup_slub_min_order(char *str)
@@ -2703,7 +2703,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
@@ -2720,7 +2720,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
@@ -3032,7 +3032,7 @@ void __init kmem_cache_init(void)
 		caches++;
 	}
 
-	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
+	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
 			"kmalloc", 1 << i, GFP_KERNEL);
 		caches++;
@@ -3059,7 +3059,7 @@ void __init kmem_cache_init(void)
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
-	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
+	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
 			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
 
@@ -3088,7 +3088,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
 		return 1;
 
-	if ((s->flags & __PAGE_ALLOC_FALLBACK)
+	if ((s->flags & __PAGE_ALLOC_FALLBACK))
 		return 1;
 
 	if (s->ctor)
@@ -3252,7 +3252,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, gfpflags);
 
 	s = get_slab(size, gfpflags);
@@ -3268,7 +3268,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, gfpflags);
 
 	s = get_slab(size, gfpflags);
-- 
cgit 


From 1387d0d8b002c8ce90412fb2695ec6085eb8ce01 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 14 Feb 2008 19:31:20 -0800
Subject: fix module_update_markers() compile error

This patch fixes the following compile error with CONFIG_MODULES=n
caused by commit fb40bd78b0f91b274879cf5db8facd1e04b6052e:

/home/bunk/linux/kernel-2.6/git/linux-2.6/kernel/marker.c: In function `marker_update_probes':
/home/bunk/linux/kernel-2.6/git/linux-2.6/kernel/marker.c:627: error: too few arguments to function `module_update_markers'

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/module.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 330bec08c2c4..819c4e889bf1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -567,8 +567,7 @@ static inline void print_modules(void)
 {
 }
 
-static inline void module_update_markers(struct module *probe_module,
-		int *refcount)
+static inline void module_update_markers(void)
 {
 }
 
-- 
cgit 


From 0778361837bfaaa0bcf9ee79f0edd10e3a220899 Mon Sep 17 00:00:00 2001
From: Ben Nizette <bn@niasdigital.com>
Date: Thu, 14 Feb 2008 19:31:31 -0800
Subject: Include kernel.h from configfs.h

configfs.h uses the container_of macro and as such should include kernel.h.

Signed-off-by: Ben Nizette <bn@niasdigital.com>
Cc: Joel Becker <Joel.Becker@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/configfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 8c6967f3fb11..4b287ad9371a 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -37,6 +37,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-- 
cgit 


From 000cb48ee18165776b5a2beb72ed18f66bc61878 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 14 Feb 2008 19:31:32 -0800
Subject: vfs: add explanation of I_DIRTY_DATASYNC bit

Add explanation of I_DIRTY_DATASYNC bit.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Joern Engel <joern@logfs.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 18cfbf76ec5b..98ffb6ead434 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1284,8 +1284,10 @@ struct super_operations {
  *
  * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
  *			fdatasync().  i_atime is the usual cause.
- * I_DIRTY_DATASYNC	Inode is dirty and must be written on fdatasync(), f.e.
- *			because i_size changed.
+ * I_DIRTY_DATASYNC	Data-related inode changes pending. We keep track of
+ *			these changes separately from I_DIRTY_SYNC so that we
+ *			don't have to write inode on fdatasync() when only
+ *			mtime has changed in it.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
  * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
  *			are cleared by unlock_new_inode(), called from iget().
-- 
cgit 


From 429731b1553bacf9a331c260c317a28aaa878edb Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:31 -0800
Subject: Remove path_release_on_umount()

path_release_on_umount() should only be called from sys_umount(). I merged the
function into sys_umount() instead of having in in namei.c.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c            | 10 ----------
 fs/namespace.c        |  4 +++-
 include/linux/namei.h |  1 -
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 52703986323a..3ed4d7576d6d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,16 +368,6 @@ void path_release(struct nameidata *nd)
 	mntput(nd->mnt);
 }
 
-/*
- * umount() mustn't call path_release()/mntput() as that would clear
- * mnt_expiry_mark
- */
-void path_release_on_umount(struct nameidata *nd)
-{
-	dput(nd->dentry);
-	mntput_no_expire(nd->mnt);
-}
-
 /**
  * release_open_intent - free up open intent resources
  * @nd: pointer to nameidata
diff --git a/fs/namespace.c b/fs/namespace.c
index 63ced21c12dc..7937d30a6732 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -690,7 +690,9 @@ asmlinkage long sys_umount(char __user * name, int flags)
 
 	retval = do_umount(nd.mnt, flags);
 dput_and_out:
-	path_release_on_umount(&nd);
+	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
+	dput(nd.dentry);
+	mntput_no_expire(nd.mnt);
 out:
 	return retval;
 }
diff --git a/include/linux/namei.h b/include/linux/namei.h
index c13e411491f4..307b1b31d37f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,7 +72,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 extern void path_release(struct nameidata *);
-extern void path_release_on_umount(struct nameidata *);
 
 extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
 extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags);
-- 
cgit 


From c5e725f33b733a77de622e91b6ba5645fcf070be Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:31 -0800
Subject: Move struct path into its own header

Move the definition of struct path into its own header file for further
patches.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/namei.h |  6 +-----
 include/linux/path.h  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/path.h

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 307b1b31d37f..1cd15dad2469 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -3,6 +3,7 @@
 
 #include <linux/dcache.h>
 #include <linux/linkage.h>
+#include <linux/path.h>
 
 struct vfsmount;
 
@@ -29,11 +30,6 @@ struct nameidata {
 	} intent;
 };
 
-struct path {
-	struct vfsmount *mnt;
-	struct dentry *dentry;
-};
-
 /*
  * Type of the last component on LOOKUP_PARENT
  */
diff --git a/include/linux/path.h b/include/linux/path.h
new file mode 100644
index 000000000000..cbebdc5c9a60
--- /dev/null
+++ b/include/linux/path.h
@@ -0,0 +1,12 @@
+#ifndef _LINUX_PATH_H
+#define _LINUX_PATH_H
+
+struct dentry;
+struct vfsmount;
+
+struct path {
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+};
+
+#endif  /* _LINUX_PATH_H */
-- 
cgit 


From 4ac9137858e08a19f29feac4e1f4df7c268b0ba5 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:32 -0800
Subject: Embed a struct path into struct nameidata instead of nd->{dentry,mnt}

This is the central patch of a cleanup series. In most cases there is no good
reason why someone would want to use a dentry for itself. This series reflects
that fact and embeds a struct path into nameidata.

Together with the other patches of this series
- it enforced the correct order of getting/releasing the reference count on
  <dentry,vfsmount> pairs
- it prepares the VFS for stacking support since it is essential to have a
  struct path in every place where the stack can be traversed
- it reduces the overall code size:

without patch series:
   text    data     bss     dec     hex filename
5321639  858418  715768 6895825  6938d1 vmlinux

with patch series:
   text    data     bss     dec     hex filename
5320026  858418  715768 6894212  693284 vmlinux

This patch:

Switch from nd->{dentry,mnt} to nd->path.{dentry,mnt} everywhere.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix cifs]
[akpm@linux-foundation.org: fix smack]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/osf_sys.c               |   2 +-
 arch/mips/kernel/sysirix.c                |   6 +-
 arch/parisc/hpux/sys_hpux.c               |   2 +-
 arch/powerpc/platforms/cell/spufs/inode.c |  18 +--
 arch/sparc64/solaris/fs.c                 |   8 +-
 arch/um/drivers/mconsole_kern.c           |   6 +-
 drivers/md/dm-table.c                     |   2 +-
 drivers/mtd/mtdsuper.c                    |  10 +-
 fs/afs/mntpt.c                            |  22 +--
 fs/autofs4/root.c                         |   3 +-
 fs/block_dev.c                            |   4 +-
 fs/cifs/cifs_dfs_ref.c                    |  23 ++--
 fs/coda/pioctl.c                          |   2 +-
 fs/compat.c                               |   4 +-
 fs/configfs/symlink.c                     |   4 +-
 fs/dquot.c                                |   7 +-
 fs/ecryptfs/dentry.c                      |  12 +-
 fs/ecryptfs/inode.c                       |  24 ++--
 fs/ecryptfs/main.c                        |   4 +-
 fs/exec.c                                 |   4 +-
 fs/ext3/super.c                           |   4 +-
 fs/ext4/super.c                           |   4 +-
 fs/gfs2/ops_fstype.c                      |   5 +-
 fs/inotify_user.c                         |   2 +-
 fs/namei.c                                | 220 +++++++++++++++---------------
 fs/namespace.c                            | 185 +++++++++++++------------
 fs/nfs/namespace.c                        |  27 ++--
 fs/nfs/nfs4proc.c                         |   8 +-
 fs/nfsctl.c                               |   2 +-
 fs/nfsd/export.c                          |  35 ++---
 fs/nfsd/nfs4recover.c                     |  32 ++---
 fs/nfsd/nfs4state.c                       |   2 +-
 fs/open.c                                 |  31 +++--
 fs/proc/base.c                            |   3 +-
 fs/proc/proc_sysctl.c                     |   2 +-
 fs/reiserfs/super.c                       |   6 +-
 fs/stat.c                                 |  13 +-
 fs/utimes.c                               |   2 +-
 fs/xattr.c                                |  16 +--
 fs/xfs/linux-2.6/xfs_ioctl.c              |   6 +-
 include/linux/namei.h                     |   3 +-
 kernel/audit_tree.c                       |  16 +--
 kernel/auditfilter.c                      |  11 +-
 net/sunrpc/rpc_pipe.c                     |   5 +-
 net/unix/af_unix.c                        |  20 +--
 security/selinux/hooks.c                  |   4 +-
 security/smack/smack_lsm.c                |   2 +-
 47 files changed, 431 insertions(+), 402 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 973c5c3705e3..f2bef5e14faa 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -259,7 +259,7 @@ osf_statfs(char __user *path, struct osf_statfs __user *buffer, unsigned long bu
 
 	retval = user_path_walk(path, &nd);
 	if (!retval) {
-		retval = do_osf_statfs(nd.dentry, buffer, bufsiz);
+		retval = do_osf_statfs(nd.path.dentry, buffer, bufsiz);
 		path_release(&nd);
 	}
 	return retval;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index d70c4e0e85fb..49d6292ffa05 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -694,7 +694,7 @@ asmlinkage int irix_statfs(const char __user *path,
 	if (error)
 		goto out;
 
-	error = vfs_statfs(nd.dentry, &kbuf);
+	error = vfs_statfs(nd.path.dentry, &kbuf);
 	if (error)
 		goto dput_and_out;
 
@@ -1360,7 +1360,7 @@ asmlinkage int irix_statvfs(char __user *fname, struct irix_statvfs __user *buf)
 	error = user_path_walk(fname, &nd);
 	if (error)
 		goto out;
-	error = vfs_statfs(nd.dentry, &kbuf);
+	error = vfs_statfs(nd.path.dentry, &kbuf);
 	if (error)
 		goto dput_and_out;
 
@@ -1611,7 +1611,7 @@ asmlinkage int irix_statvfs64(char __user *fname, struct irix_statvfs64 __user *
 	error = user_path_walk(fname, &nd);
 	if (error)
 		goto out;
-	error = vfs_statfs(nd.dentry, &kbuf);
+	error = vfs_statfs(nd.path.dentry, &kbuf);
 	if (error)
 		goto dput_and_out;
 
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 3e025df2dc86..d7395af3e846 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -219,7 +219,7 @@ asmlinkage long hpux_statfs(const char __user *path,
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct hpux_statfs tmp;
-		error = vfs_statfs_hpux(nd.dentry, &tmp);
+		error = vfs_statfs_hpux(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index e6e6559c55ed..6d1228c66c5e 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -1,3 +1,4 @@
+
 /*
  * SPU file system
  *
@@ -592,7 +593,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
 
 	ret = -EINVAL;
 	/* check if we are on spufs */
-	if (nd->dentry->d_sb->s_type != &spufs_type)
+	if (nd->path.dentry->d_sb->s_type != &spufs_type)
 		goto out;
 
 	/* don't accept undefined flags */
@@ -600,9 +601,9 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
 		goto out;
 
 	/* only threads can be underneath a gang */
-	if (nd->dentry != nd->dentry->d_sb->s_root) {
+	if (nd->path.dentry != nd->path.dentry->d_sb->s_root) {
 		if ((flags & SPU_CREATE_GANG) ||
-		    !SPUFS_I(nd->dentry->d_inode)->i_gang)
+		    !SPUFS_I(nd->path.dentry->d_inode)->i_gang)
 			goto out;
 	}
 
@@ -618,16 +619,17 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
 	mode &= ~current->fs->umask;
 
 	if (flags & SPU_CREATE_GANG)
-		return spufs_create_gang(nd->dentry->d_inode,
-					dentry, nd->mnt, mode);
+		return spufs_create_gang(nd->path.dentry->d_inode,
+					 dentry, nd->path.mnt, mode);
 	else
-		return spufs_create_context(nd->dentry->d_inode,
-					dentry, nd->mnt, flags, mode, filp);
+		return spufs_create_context(nd->path.dentry->d_inode,
+					    dentry, nd->path.mnt, flags, mode,
+					    filp);
 
 out_dput:
 	dput(dentry);
 out_dir:
-	mutex_unlock(&nd->dentry->d_inode->i_mutex);
+	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
 out:
 	return ret;
 }
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 9311bfe4f2f7..516932e9f70b 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -434,8 +434,8 @@ asmlinkage int solaris_statvfs(u32 path, u32 buf)
 
 	error = user_path_walk(A(path),&nd);
 	if (!error) {
-		struct inode * inode = nd.dentry->d_inode;
-		error = report_statvfs(nd.mnt, inode, buf);
+		struct inode *inode = nd.path.dentry->d_inode;
+		error = report_statvfs(nd.path.mnt, inode, buf);
 		path_release(&nd);
 	}
 	return error;
@@ -464,8 +464,8 @@ asmlinkage int solaris_statvfs64(u32 path, u32 buf)
 	lock_kernel();
 	error = user_path_walk(A(path), &nd);
 	if (!error) {
-		struct inode * inode = nd.dentry->d_inode;
-		error = report_statvfs64(nd.mnt, inode, buf);
+		struct inode *inode = nd.path.dentry->d_inode;
+		error = report_statvfs64(nd.path.mnt, inode, buf);
 		path_release(&nd);
 	}
 	unlock_kernel();
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index ebb265c07e4d..19d579d74d27 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -145,8 +145,8 @@ void mconsole_proc(struct mc_request *req)
 	}
 	up_write(&super->s_umount);
 
-	nd.dentry = super->s_root;
-	nd.mnt = NULL;
+	nd.path.dentry = super->s_root;
+	nd.path.mnt = NULL;
 	nd.flags = O_RDONLY + 1;
 	nd.last_type = LAST_ROOT;
 
@@ -159,7 +159,7 @@ void mconsole_proc(struct mc_request *req)
 		goto out_kill;
 	}
 
-	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+	file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		goto out_kill;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f16062982383..b611a3c61504 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -361,7 +361,7 @@ static int lookup_device(const char *path, dev_t *dev)
 	if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd)))
 		return r;
 
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 	if (!inode) {
 		r = -ENOENT;
 		goto out;
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 9b430f20b640..e376f4517905 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -184,25 +184,25 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	ret = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
 
 	DEBUG(1, "MTDSB: path_lookup() returned %d, inode %p\n",
-	      ret, nd.dentry ? nd.dentry->d_inode : NULL);
+	      ret, nd.path.dentry ? nd.path.dentry->d_inode : NULL);
 
 	if (ret)
 		return ret;
 
 	ret = -EINVAL;
 
-	if (!S_ISBLK(nd.dentry->d_inode->i_mode))
+	if (!S_ISBLK(nd.path.dentry->d_inode->i_mode))
 		goto out;
 
-	if (nd.mnt->mnt_flags & MNT_NODEV) {
+	if (nd.path.mnt->mnt_flags & MNT_NODEV) {
 		ret = -EACCES;
 		goto out;
 	}
 
-	if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR)
+	if (imajor(nd.path.dentry->d_inode) != MTD_BLOCK_MAJOR)
 		goto not_an_MTD_device;
 
-	mtdnr = iminor(nd.dentry->d_inode);
+	mtdnr = iminor(nd.path.dentry->d_inode);
 	path_release(&nd);
 
 	return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5ce43b63c60e..4136dfb9ffb8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -218,14 +218,14 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	_enter("%p{%s},{%s:%p{%s},}",
 	       dentry,
 	       dentry->d_name.name,
-	       nd->mnt->mnt_devname,
+	       nd->path.mnt->mnt_devname,
 	       dentry,
-	       nd->dentry->d_name.name);
+	       nd->path.dentry->d_name.name);
 
-	dput(nd->dentry);
-	nd->dentry = dget(dentry);
+	dput(nd->path.dentry);
+	nd->path.dentry = dget(dentry);
 
-	newmnt = afs_mntpt_do_automount(nd->dentry);
+	newmnt = afs_mntpt_do_automount(nd->path.dentry);
 	if (IS_ERR(newmnt)) {
 		path_release(nd);
 		return (void *)newmnt;
@@ -235,17 +235,17 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
-		dput(nd->dentry);
-		mntput(nd->mnt);
-		nd->mnt = newmnt;
-		nd->dentry = dget(newmnt->mnt_root);
+		dput(nd->path.dentry);
+		mntput(nd->path.mnt);
+		nd->path.mnt = newmnt;
+		nd->path.dentry = dget(newmnt->mnt_root);
 		schedule_delayed_work(&afs_mntpt_expiry_timer,
 				      afs_mntpt_expiry_timeout * HZ);
 		break;
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
-		while (d_mountpoint(nd->dentry) &&
-		       follow_down(&nd->mnt, &nd->dentry))
+		while (d_mountpoint(nd->path.dentry) &&
+		       follow_down(&nd->path.mnt, &nd->path.dentry))
 			;
 		err = 0;
 	default:
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2bbcc8151dc3..a119c863ff37 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -368,7 +368,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		 * so we don't need to follow the mount.
 		 */
 		if (d_mountpoint(dentry)) {
-			if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+			if (!autofs4_follow_mount(&nd->path.mnt,
+						  &nd->path.dentry)) {
 				status = -ENOENT;
 				goto out_error;
 			}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e63067d25cdb..5f4721fdbdb6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1397,12 +1397,12 @@ struct block_device *lookup_bdev(const char *path)
 	if (error)
 		return ERR_PTR(error);
 
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
 	error = -EACCES;
-	if (nd.mnt->mnt_flags & MNT_NODEV)
+	if (nd.path.mnt->mnt_flags & MNT_NODEV)
 		goto fail;
 	error = -ENOMEM;
 	bdev = bd_acquire(inode);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 413ee2349d1a..bcd53c2fe781 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -259,18 +259,18 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	int err;
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, nd->mnt->mnt_flags, mntlist);
+	err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
 	switch (err) {
 	case 0:
-		dput(nd->dentry);
-		mntput(nd->mnt);
-		nd->mnt = newmnt;
-		nd->dentry = dget(newmnt->mnt_root);
+		dput(nd->path.dentry);
+		mntput(nd->path.mnt);
+		nd->path.mnt = newmnt;
+		nd->path.dentry = dget(newmnt->mnt_root);
 		break;
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
-		while (d_mountpoint(nd->dentry) &&
-		       follow_down(&nd->mnt, &nd->dentry))
+		while (d_mountpoint(nd->path.dentry) &&
+		       follow_down(&nd->path.mnt, &nd->path.dentry))
 			;
 		err = 0;
 	default:
@@ -307,8 +307,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 
 	xid = GetXid();
 
-	dput(nd->dentry);
-	nd->dentry = dget(dentry);
+	dput(nd->path.dentry);
+	nd->path.dentry = dget(dentry);
 
 	cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
 	ses = cifs_sb->tcon->ses;
@@ -340,7 +340,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 				rc = -EINVAL;
 				goto out_err;
 			}
-			mnt = cifs_dfs_do_refmount(nd->mnt, nd->dentry,
+			mnt = cifs_dfs_do_refmount(nd->path.mnt,
+						nd->path.dentry,
 						referrals[i].node_name);
 			cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p",
 					 __FUNCTION__,
@@ -357,7 +358,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	if (IS_ERR(mnt))
 		goto out_err;
 
-	nd->mnt->mnt_flags |= MNT_SHRINKABLE;
+	nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
 	rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
 
 out:
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2bf3026adc80..3b6a1b721b46 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -75,7 +75,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
 	if ( error ) {
 		return error;
         } else {
-	        target_inode = nd.dentry->d_inode;
+		target_inode = nd.path.dentry->d_inode;
 	}
 	
 	/* return if it is not a Coda inode */
diff --git a/fs/compat.c b/fs/compat.c
index ee80ff341d37..a8d62375ada1 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -241,7 +241,7 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry, &tmp);
+		error = vfs_statfs(nd.path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
@@ -309,7 +309,7 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry, &tmp);
+		error = vfs_statfs(nd.path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 22700d2857da..cda3ea001ae6 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -99,8 +99,8 @@ static int get_target(const char *symname, struct nameidata *nd,
 
 	ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd);
 	if (!ret) {
-		if (nd->dentry->d_sb == configfs_sb) {
-			*target = configfs_get_config_item(nd->dentry);
+		if (nd->path.dentry->d_sb == configfs_sb) {
+			*target = configfs_get_config_item(nd->path.dentry);
 			if (!*target) {
 				ret = -ENOENT;
 				path_release(nd);
diff --git a/fs/dquot.c b/fs/dquot.c
index def4e969df77..289f48d2c727 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1633,14 +1633,15 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
 	error = path_lookup(path, LOOKUP_FOLLOW, &nd);
 	if (error < 0)
 		return error;
-	error = security_quota_on(nd.dentry);
+	error = security_quota_on(nd.path.dentry);
 	if (error)
 		goto out_path;
 	/* Quota file not on the same filesystem? */
-	if (nd.mnt->mnt_sb != sb)
+	if (nd.path.mnt->mnt_sb != sb)
 		error = -EXDEV;
 	else
-		error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
+		error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
+					   format_id);
 out_path:
 	path_release(&nd);
 	return error;
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index cb20b964419f..841a032050a7 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -51,13 +51,13 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
 		goto out;
-	dentry_save = nd->dentry;
-	vfsmount_save = nd->mnt;
-	nd->dentry = lower_dentry;
-	nd->mnt = lower_mnt;
+	dentry_save = nd->path.dentry;
+	vfsmount_save = nd->path.mnt;
+	nd->path.dentry = lower_dentry;
+	nd->path.mnt = lower_mnt;
 	rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
-	nd->dentry = dentry_save;
-	nd->mnt = vfsmount_save;
+	nd->path.dentry = dentry_save;
+	nd->path.mnt = vfsmount_save;
 	if (dentry->d_inode) {
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index edd1e44e9d47..e23861152101 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -77,13 +77,13 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
 	struct vfsmount *vfsmount_save;
 	int rc;
 
-	dentry_save = nd->dentry;
-	vfsmount_save = nd->mnt;
-	nd->dentry = lower_dentry;
-	nd->mnt = lower_mnt;
+	dentry_save = nd->path.dentry;
+	vfsmount_save = nd->path.mnt;
+	nd->path.dentry = lower_dentry;
+	nd->path.mnt = lower_mnt;
 	rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
-	nd->dentry = dentry_save;
-	nd->mnt = vfsmount_save;
+	nd->path.dentry = dentry_save;
+	nd->path.mnt = vfsmount_save;
 	return rc;
 }
 
@@ -819,14 +819,14 @@ ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	int rc;
 
         if (nd) {
-		struct vfsmount *vfsmnt_save = nd->mnt;
-		struct dentry *dentry_save = nd->dentry;
+		struct vfsmount *vfsmnt_save = nd->path.mnt;
+		struct dentry *dentry_save = nd->path.dentry;
 
-		nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry);
-		nd->dentry = ecryptfs_dentry_to_lower(nd->dentry);
+		nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
+		nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
 		rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
-		nd->mnt = vfsmnt_save;
-		nd->dentry = dentry_save;
+		nd->path.mnt = vfsmnt_save;
+		nd->path.dentry = dentry_save;
         } else
 		rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
         return rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 778c420e4cac..a70555a6472c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -513,8 +513,8 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
 		ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
 		goto out;
 	}
-	lower_root = nd.dentry;
-	lower_mnt = nd.mnt;
+	lower_root = nd.path.dentry;
+	lower_mnt = nd.path.mnt;
 	ecryptfs_set_superblock_lower(sb, lower_root->d_sb);
 	sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
 	sb->s_blocksize = lower_root->d_sb->s_blocksize;
diff --git a/fs/exec.c b/fs/exec.c
index 9ff6069094d8..7a12d2d1ac11 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -112,7 +112,7 @@ asmlinkage long sys_uselib(const char __user * library)
 		goto out;
 
 	error = -EINVAL;
-	if (!S_ISREG(nd.dentry->d_inode->i_mode))
+	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto exit;
 
 	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
@@ -652,7 +652,7 @@ struct file *open_exec(const char *name)
 	file = ERR_PTR(err);
 
 	if (!err) {
-		struct inode *inode = nd.dentry->d_inode;
+		struct inode *inode = nd.path.dentry->d_inode;
 		file = ERR_PTR(-EACCES);
 		if (S_ISREG(inode->i_mode)) {
 			int err = vfs_permission(&nd, MAY_EXEC);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8e02cbfb1123..0b5057e0dc1e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2758,12 +2758,12 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	if (err)
 		return err;
 	/* Quotafile not on the same filesystem? */
-	if (nd.mnt->mnt_sb != sb) {
+	if (nd.path.mnt->mnt_sb != sb) {
 		path_release(&nd);
 		return -EXDEV;
 	}
 	/* Quotafile not of fs root? */
-	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
+	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
 		printk(KERN_WARNING
 			"EXT3-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0072da75221f..37117990073d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3158,12 +3158,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 	if (err)
 		return err;
 	/* Quotafile not on the same filesystem? */
-	if (nd.mnt->mnt_sb != sb) {
+	if (nd.path.mnt->mnt_sb != sb) {
 		path_release(&nd);
 		return -EXDEV;
 	}
 	/* Quotafile not of fs root? */
-	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
+	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
 		printk(KERN_WARNING
 			"EXT4-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 43d511bba52d..f4ced7fcda82 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -884,12 +884,13 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
 		       dev_name);
 		goto out;
 	}
-	error = vfs_getattr(nd.mnt, nd.dentry, &stat);
+	error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
 
 	fstype = get_fs_type("gfs2");
 	list_for_each_entry(s, &fstype->fs_supers, s_instances) {
 		if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
-		    (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
+		    (S_ISDIR(stat.mode) &&
+		     s == nd.path.dentry->d_inode->i_sb)) {
 			sb = s;
 			goto free_nd;
 		}
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 9ef4d212c507..e9c58652533a 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -667,7 +667,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		goto fput_and_out;
 
 	/* inode held in place by reference to nd; dev by fget on fd */
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 	dev = filp->private_data;
 
 	mutex_lock(&dev->up_mutex);
diff --git a/fs/namei.c b/fs/namei.c
index 3ed4d7576d6d..c9b05a71c39c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -231,7 +231,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct vfsmount *mnt = NULL;
 
 	if (nd)
-		mnt = nd->mnt;
+		mnt = nd->path.mnt;
 
 	if (mask & MAY_WRITE) {
 		umode_t mode = inode->i_mode;
@@ -296,7 +296,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
  */
 int vfs_permission(struct nameidata *nd, int mask)
 {
-	return permission(nd->dentry->d_inode, mask, nd);
+	return permission(nd->path.dentry->d_inode, mask, nd);
 }
 
 /**
@@ -364,8 +364,8 @@ int deny_write_access(struct file * file)
 
 void path_release(struct nameidata *nd)
 {
-	dput(nd->dentry);
-	mntput(nd->mnt);
+	dput(nd->path.dentry);
+	mntput(nd->path.mnt);
 }
 
 /**
@@ -530,15 +530,15 @@ walk_init_root(const char *name, struct nameidata *nd)
 
 	read_lock(&fs->lock);
 	if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-		nd->mnt = mntget(fs->altrootmnt);
-		nd->dentry = dget(fs->altroot);
+		nd->path.mnt = mntget(fs->altrootmnt);
+		nd->path.dentry = dget(fs->altroot);
 		read_unlock(&fs->lock);
 		if (__emul_lookup_dentry(name,nd))
 			return 0;
 		read_lock(&fs->lock);
 	}
-	nd->mnt = mntget(fs->rootmnt);
-	nd->dentry = dget(fs->root);
+	nd->path.mnt = mntget(fs->rootmnt);
+	nd->path.dentry = dget(fs->root);
 	read_unlock(&fs->lock);
 	return 1;
 }
@@ -581,17 +581,17 @@ fail:
 static inline void dput_path(struct path *path, struct nameidata *nd)
 {
 	dput(path->dentry);
-	if (path->mnt != nd->mnt)
+	if (path->mnt != nd->path.mnt)
 		mntput(path->mnt);
 }
 
 static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
 {
-	dput(nd->dentry);
-	if (nd->mnt != path->mnt)
-		mntput(nd->mnt);
-	nd->mnt = path->mnt;
-	nd->dentry = path->dentry;
+	dput(nd->path.dentry);
+	if (nd->path.mnt != path->mnt)
+		mntput(nd->path.mnt);
+	nd->path.mnt = path->mnt;
+	nd->path.dentry = path->dentry;
 }
 
 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
@@ -603,7 +603,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	touch_atime(path->mnt, dentry);
 	nd_set_link(nd, NULL);
 
-	if (path->mnt != nd->mnt) {
+	if (path->mnt != nd->path.mnt) {
 		path_to_nameidata(path, nd);
 		dget(dentry);
 	}
@@ -733,37 +733,37 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 
 	while(1) {
 		struct vfsmount *parent;
-		struct dentry *old = nd->dentry;
+		struct dentry *old = nd->path.dentry;
 
                 read_lock(&fs->lock);
-		if (nd->dentry == fs->root &&
-		    nd->mnt == fs->rootmnt) {
+		if (nd->path.dentry == fs->root &&
+		    nd->path.mnt == fs->rootmnt) {
                         read_unlock(&fs->lock);
 			break;
 		}
                 read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
-		if (nd->dentry != nd->mnt->mnt_root) {
-			nd->dentry = dget(nd->dentry->d_parent);
+		if (nd->path.dentry != nd->path.mnt->mnt_root) {
+			nd->path.dentry = dget(nd->path.dentry->d_parent);
 			spin_unlock(&dcache_lock);
 			dput(old);
 			break;
 		}
 		spin_unlock(&dcache_lock);
 		spin_lock(&vfsmount_lock);
-		parent = nd->mnt->mnt_parent;
-		if (parent == nd->mnt) {
+		parent = nd->path.mnt->mnt_parent;
+		if (parent == nd->path.mnt) {
 			spin_unlock(&vfsmount_lock);
 			break;
 		}
 		mntget(parent);
-		nd->dentry = dget(nd->mnt->mnt_mountpoint);
+		nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
 		spin_unlock(&vfsmount_lock);
 		dput(old);
-		mntput(nd->mnt);
-		nd->mnt = parent;
+		mntput(nd->path.mnt);
+		nd->path.mnt = parent;
 	}
-	follow_mount(&nd->mnt, &nd->dentry);
+	follow_mount(&nd->path.mnt, &nd->path.dentry);
 }
 
 /*
@@ -774,8 +774,8 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 static int do_lookup(struct nameidata *nd, struct qstr *name,
 		     struct path *path)
 {
-	struct vfsmount *mnt = nd->mnt;
-	struct dentry *dentry = __d_lookup(nd->dentry, name);
+	struct vfsmount *mnt = nd->path.mnt;
+	struct dentry *dentry = __d_lookup(nd->path.dentry, name);
 
 	if (!dentry)
 		goto need_lookup;
@@ -788,7 +788,7 @@ done:
 	return 0;
 
 need_lookup:
-	dentry = real_lookup(nd->dentry, name, nd);
+	dentry = real_lookup(nd->path.dentry, name, nd);
 	if (IS_ERR(dentry))
 		goto fail;
 	goto done;
@@ -825,7 +825,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 	if (!*name)
 		goto return_reval;
 
-	inode = nd->dentry->d_inode;
+	inode = nd->path.dentry->d_inode;
 	if (nd->depth)
 		lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 
@@ -873,7 +873,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 				if (this.name[1] != '.')
 					break;
 				follow_dotdot(nd);
-				inode = nd->dentry->d_inode;
+				inode = nd->path.dentry->d_inode;
 				/* fallthrough */
 			case 1:
 				continue;
@@ -882,8 +882,9 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		 * See if the low-level filesystem might want
 		 * to use its own hash..
 		 */
-		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
-			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
+		if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
+			err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
+							    &this);
 			if (err < 0)
 				break;
 		}
@@ -905,7 +906,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 			if (err)
 				goto return_err;
 			err = -ENOENT;
-			inode = nd->dentry->d_inode;
+			inode = nd->path.dentry->d_inode;
 			if (!inode)
 				break;
 			err = -ENOTDIR; 
@@ -933,13 +934,14 @@ last_component:
 				if (this.name[1] != '.')
 					break;
 				follow_dotdot(nd);
-				inode = nd->dentry->d_inode;
+				inode = nd->path.dentry->d_inode;
 				/* fallthrough */
 			case 1:
 				goto return_reval;
 		}
-		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
-			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
+		if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
+			err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
+							    &this);
 			if (err < 0)
 				break;
 		}
@@ -952,7 +954,7 @@ last_component:
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
-			inode = nd->dentry->d_inode;
+			inode = nd->path.dentry->d_inode;
 		} else
 			path_to_nameidata(&next, nd);
 		err = -ENOENT;
@@ -980,11 +982,12 @@ return_reval:
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->dentry && nd->dentry->d_sb &&
-		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+		if (nd->path.dentry && nd->path.dentry->d_sb &&
+		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
 			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
-			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
+			if (!nd->path.dentry->d_op->d_revalidate(
+					nd->path.dentry, nd))
 				break;
 		}
 return_base:
@@ -1011,20 +1014,20 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 	int result;
 
 	/* make sure the stuff we saved doesn't go away */
-	dget(save.dentry);
-	mntget(save.mnt);
+	dget(save.path.dentry);
+	mntget(save.path.mnt);
 
 	result = __link_path_walk(name, nd);
 	if (result == -ESTALE) {
 		*nd = save;
-		dget(nd->dentry);
-		mntget(nd->mnt);
+		dget(nd->path.dentry);
+		mntget(nd->path.mnt);
 		nd->flags |= LOOKUP_REVAL;
 		result = __link_path_walk(name, nd);
 	}
 
-	dput(save.dentry);
-	mntput(save.mnt);
+	dput(save.path.dentry);
+	mntput(save.path.mnt);
 
 	return result;
 }
@@ -1044,9 +1047,10 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 	if (path_walk(name, nd))
 		return 0;		/* something went wrong... */
 
-	if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
-		struct dentry *old_dentry = nd->dentry;
-		struct vfsmount *old_mnt = nd->mnt;
+	if (!nd->path.dentry->d_inode ||
+	    S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
+		struct dentry *old_dentry = nd->path.dentry;
+		struct vfsmount *old_mnt = nd->path.mnt;
 		struct qstr last = nd->last;
 		int last_type = nd->last_type;
 		struct fs_struct *fs = current->fs;
@@ -1057,19 +1061,19 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 		 */
 		nd->last_type = LAST_ROOT;
 		read_lock(&fs->lock);
-		nd->mnt = mntget(fs->rootmnt);
-		nd->dentry = dget(fs->root);
+		nd->path.mnt = mntget(fs->rootmnt);
+		nd->path.dentry = dget(fs->root);
 		read_unlock(&fs->lock);
 		if (path_walk(name, nd) == 0) {
-			if (nd->dentry->d_inode) {
+			if (nd->path.dentry->d_inode) {
 				dput(old_dentry);
 				mntput(old_mnt);
 				return 1;
 			}
 			path_release(nd);
 		}
-		nd->dentry = old_dentry;
-		nd->mnt = old_mnt;
+		nd->path.dentry = old_dentry;
+		nd->path.mnt = old_mnt;
 		nd->last = last;
 		nd->last_type = last_type;
 	}
@@ -1089,8 +1093,8 @@ void set_fs_altroot(void)
 		goto set_it;
 	err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
 	if (!err) {
-		mnt = nd.mnt;
-		dentry = nd.dentry;
+		mnt = nd.path.mnt;
+		dentry = nd.path.dentry;
 	}
 set_it:
 	write_lock(&fs->lock);
@@ -1121,20 +1125,20 @@ static int do_path_lookup(int dfd, const char *name,
 	if (*name=='/') {
 		read_lock(&fs->lock);
 		if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-			nd->mnt = mntget(fs->altrootmnt);
-			nd->dentry = dget(fs->altroot);
+			nd->path.mnt = mntget(fs->altrootmnt);
+			nd->path.dentry = dget(fs->altroot);
 			read_unlock(&fs->lock);
 			if (__emul_lookup_dentry(name,nd))
 				goto out; /* found in altroot */
 			read_lock(&fs->lock);
 		}
-		nd->mnt = mntget(fs->rootmnt);
-		nd->dentry = dget(fs->root);
+		nd->path.mnt = mntget(fs->rootmnt);
+		nd->path.dentry = dget(fs->root);
 		read_unlock(&fs->lock);
 	} else if (dfd == AT_FDCWD) {
 		read_lock(&fs->lock);
-		nd->mnt = mntget(fs->pwdmnt);
-		nd->dentry = dget(fs->pwd);
+		nd->path.mnt = mntget(fs->pwdmnt);
+		nd->path.dentry = dget(fs->pwd);
 		read_unlock(&fs->lock);
 	} else {
 		struct dentry *dentry;
@@ -1154,17 +1158,17 @@ static int do_path_lookup(int dfd, const char *name,
 		if (retval)
 			goto fput_fail;
 
-		nd->mnt = mntget(file->f_path.mnt);
-		nd->dentry = dget(dentry);
+		nd->path.mnt = mntget(file->f_path.mnt);
+		nd->path.dentry = dget(dentry);
 
 		fput_light(file, fput_needed);
 	}
 
 	retval = path_walk(name, nd);
 out:
-	if (unlikely(!retval && !audit_dummy_context() && nd->dentry &&
-				nd->dentry->d_inode))
-		audit_inode(name, nd->dentry);
+	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
+				nd->path.dentry->d_inode))
+		audit_inode(name, nd->path.dentry);
 out_fail:
 	return retval;
 
@@ -1198,13 +1202,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	nd->flags = flags;
 	nd->depth = 0;
 
-	nd->mnt = mntget(mnt);
-	nd->dentry = dget(dentry);
+	nd->path.mnt = mntget(mnt);
+	nd->path.dentry = dget(dentry);
 
 	retval = path_walk(name, nd);
-	if (unlikely(!retval && !audit_dummy_context() && nd->dentry &&
-				nd->dentry->d_inode))
-		audit_inode(name, nd->dentry);
+	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
+				nd->path.dentry->d_inode))
+		audit_inode(name, nd->path.dentry);
 
 	return retval;
 
@@ -1323,10 +1327,10 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 {
 	int err;
 
-	err = permission(nd->dentry->d_inode, MAY_EXEC, nd);
+	err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd);
 	if (err)
 		return ERR_PTR(err);
-	return __lookup_hash(&nd->last, nd->dentry, nd);
+	return __lookup_hash(&nd->last, nd->path.dentry, nd);
 }
 
 static int __lookup_one_len(const char *name, struct qstr *this,
@@ -1585,7 +1589,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 int may_open(struct nameidata *nd, int acc_mode, int flag)
 {
-	struct dentry *dentry = nd->dentry;
+	struct dentry *dentry = nd->path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1606,7 +1610,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 	    	flag &= ~O_TRUNC;
 	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
-		if (nd->mnt->mnt_flags & MNT_NODEV)
+		if (nd->path.mnt->mnt_flags & MNT_NODEV)
 			return -EACCES;
 
 		flag &= ~O_TRUNC;
@@ -1668,14 +1672,14 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
 				int flag, int mode)
 {
 	int error;
-	struct dentry *dir = nd->dentry;
+	struct dentry *dir = nd->path.dentry;
 
 	if (!IS_POSIXACL(dir->d_inode))
 		mode &= ~current->fs->umask;
 	error = vfs_create(dir->d_inode, path->dentry, mode, nd);
 	mutex_unlock(&dir->d_inode->i_mutex);
-	dput(nd->dentry);
-	nd->dentry = path->dentry;
+	dput(nd->path.dentry);
+	nd->path.dentry = path->dentry;
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
@@ -1742,11 +1746,11 @@ int open_namei(int dfd, const char *pathname, int flag,
 	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
 		goto exit;
 
-	dir = nd->dentry;
+	dir = nd->path.dentry;
 	nd->flags &= ~LOOKUP_PARENT;
 	mutex_lock(&dir->d_inode->i_mutex);
 	path.dentry = lookup_hash(nd);
-	path.mnt = nd->mnt;
+	path.mnt = nd->path.mnt;
 
 do_last:
 	error = PTR_ERR(path.dentry);
@@ -1851,10 +1855,10 @@ do_link:
 		__putname(nd->last.name);
 		goto exit;
 	}
-	dir = nd->dentry;
+	dir = nd->path.dentry;
 	mutex_lock(&dir->d_inode->i_mutex);
 	path.dentry = lookup_hash(nd);
-	path.mnt = nd->mnt;
+	path.mnt = nd->path.mnt;
 	__putname(nd->last.name);
 	goto do_last;
 }
@@ -1867,13 +1871,13 @@ do_link:
  * Simple function to lookup and return a dentry and create it
  * if it doesn't exist.  Is SMP-safe.
  *
- * Returns with nd->dentry->d_inode->i_mutex locked.
+ * Returns with nd->path.dentry->d_inode->i_mutex locked.
  */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 
-	mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	/*
 	 * Yucky last component or no last component at all?
 	 * (foo/., foo/.., /////)
@@ -1952,19 +1956,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 	dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(dentry);
 
-	if (!IS_POSIXACL(nd.dentry->d_inode))
+	if (!IS_POSIXACL(nd.path.dentry->d_inode))
 		mode &= ~current->fs->umask;
 	if (!IS_ERR(dentry)) {
 		switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
-			error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
+			error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
 			break;
 		case S_IFCHR: case S_IFBLK:
-			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
+			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,
 					new_decode_dev(dev));
 			break;
 		case S_IFIFO: case S_IFSOCK:
-			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
+			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
 			break;
 		case S_IFDIR:
 			error = -EPERM;
@@ -1974,7 +1978,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 		}
 		dput(dentry);
 	}
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out:
 	putname(tmp);
@@ -2029,12 +2033,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	if (IS_ERR(dentry))
 		goto out_unlock;
 
-	if (!IS_POSIXACL(nd.dentry->d_inode))
+	if (!IS_POSIXACL(nd.path.dentry->d_inode))
 		mode &= ~current->fs->umask;
-	error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+	error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
 	dput(dentry);
 out_unlock:
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out:
 	putname(tmp);
@@ -2133,15 +2137,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
 			error = -EBUSY;
 			goto exit1;
 	}
-	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto exit2;
-	error = vfs_rmdir(nd.dentry->d_inode, dentry);
+	error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
 	dput(dentry);
 exit2:
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 exit1:
 	path_release(&nd);
 exit:
@@ -2209,7 +2213,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	error = -EISDIR;
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
-	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
@@ -2219,11 +2223,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 		inode = dentry->d_inode;
 		if (inode)
 			atomic_inc(&inode->i_count);
-		error = vfs_unlink(nd.dentry->d_inode, dentry);
+		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
 	exit2:
 		dput(dentry);
 	}
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	if (inode)
 		iput(inode);	/* truncate the inode here */
 exit1:
@@ -2300,10 +2304,10 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	if (IS_ERR(dentry))
 		goto out_unlock;
 
-	error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
+	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
 	dput(dentry);
 out_unlock:
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out:
 	putname(to);
@@ -2389,16 +2393,16 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	if (error)
 		goto out;
 	error = -EXDEV;
-	if (old_nd.mnt != nd.mnt)
+	if (old_nd.path.mnt != nd.path.mnt)
 		goto out_release;
 	new_dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
 		goto out_unlock;
-	error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+	error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
 	dput(new_dentry);
 out_unlock:
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 out_release:
 	path_release(&nd);
 out:
@@ -2578,15 +2582,15 @@ static int do_rename(int olddfd, const char *oldname,
 		goto exit1;
 
 	error = -EXDEV;
-	if (oldnd.mnt != newnd.mnt)
+	if (oldnd.path.mnt != newnd.path.mnt)
 		goto exit2;
 
-	old_dir = oldnd.dentry;
+	old_dir = oldnd.path.dentry;
 	error = -EBUSY;
 	if (oldnd.last_type != LAST_NORM)
 		goto exit2;
 
-	new_dir = newnd.dentry;
+	new_dir = newnd.path.dentry;
 	if (newnd.last_type != LAST_NORM)
 		goto exit2;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 7937d30a6732..5d9fd4c6d1f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -157,13 +157,13 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 
 static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
 {
-	old_nd->dentry = mnt->mnt_mountpoint;
-	old_nd->mnt = mnt->mnt_parent;
+	old_nd->path.dentry = mnt->mnt_mountpoint;
+	old_nd->path.mnt = mnt->mnt_parent;
 	mnt->mnt_parent = mnt;
 	mnt->mnt_mountpoint = mnt->mnt_root;
 	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
-	old_nd->dentry->d_mounted--;
+	old_nd->path.dentry->d_mounted--;
 }
 
 void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
@@ -176,10 +176,10 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 
 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
 {
-	mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
+	mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt);
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
-			hash(nd->mnt, nd->dentry));
-	list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+			hash(nd->path.mnt, nd->path.dentry));
+	list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts);
 }
 
 /*
@@ -679,20 +679,20 @@ asmlinkage long sys_umount(char __user * name, int flags)
 	if (retval)
 		goto out;
 	retval = -EINVAL;
-	if (nd.dentry != nd.mnt->mnt_root)
+	if (nd.path.dentry != nd.path.mnt->mnt_root)
 		goto dput_and_out;
-	if (!check_mnt(nd.mnt))
+	if (!check_mnt(nd.path.mnt))
 		goto dput_and_out;
 
 	retval = -EPERM;
 	if (!capable(CAP_SYS_ADMIN))
 		goto dput_and_out;
 
-	retval = do_umount(nd.mnt, flags);
+	retval = do_umount(nd.path.mnt, flags);
 dput_and_out:
 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
-	dput(nd.dentry);
-	mntput_no_expire(nd.mnt);
+	dput(nd.path.dentry);
+	mntput_no_expire(nd.path.mnt);
 out:
 	return retval;
 }
@@ -715,10 +715,10 @@ static int mount_is_safe(struct nameidata *nd)
 		return 0;
 	return -EPERM;
 #ifdef notyet
-	if (S_ISLNK(nd->dentry->d_inode->i_mode))
+	if (S_ISLNK(nd->path.dentry->d_inode->i_mode))
 		return -EPERM;
-	if (nd->dentry->d_inode->i_mode & S_ISVTX) {
-		if (current->uid != nd->dentry->d_inode->i_uid)
+	if (nd->path.dentry->d_inode->i_mode & S_ISVTX) {
+		if (current->uid != nd->path.dentry->d_inode->i_uid)
 			return -EPERM;
 	}
 	if (vfs_permission(nd, MAY_WRITE))
@@ -767,8 +767,8 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
 				q = q->mnt_parent;
 			}
 			p = s;
-			nd.mnt = q;
-			nd.dentry = p->mnt_mountpoint;
+			nd.path.mnt = q;
+			nd.path.dentry = p->mnt_mountpoint;
 			q = clone_mnt(p, p->mnt_root, flag);
 			if (!q)
 				goto Enomem;
@@ -877,8 +877,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
 			struct nameidata *nd, struct nameidata *parent_nd)
 {
 	LIST_HEAD(tree_list);
-	struct vfsmount *dest_mnt = nd->mnt;
-	struct dentry *dest_dentry = nd->dentry;
+	struct vfsmount *dest_mnt = nd->path.mnt;
+	struct dentry *dest_dentry = nd->path.dentry;
 	struct vfsmount *child, *p;
 
 	if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
@@ -913,13 +913,13 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 	if (mnt->mnt_sb->s_flags & MS_NOUSER)
 		return -EINVAL;
 
-	if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
+	if (S_ISDIR(nd->path.dentry->d_inode->i_mode) !=
 	      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
 		return -ENOTDIR;
 
 	err = -ENOENT;
-	mutex_lock(&nd->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(nd->dentry->d_inode))
+	mutex_lock(&nd->path.dentry->d_inode->i_mutex);
+	if (IS_DEADDIR(nd->path.dentry->d_inode))
 		goto out_unlock;
 
 	err = security_sb_check_sb(mnt, nd);
@@ -927,10 +927,10 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 		goto out_unlock;
 
 	err = -ENOENT;
-	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
+	if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry))
 		err = attach_recursive_mnt(mnt, nd, NULL);
 out_unlock:
-	mutex_unlock(&nd->dentry->d_inode->i_mutex);
+	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
 	if (!err)
 		security_sb_post_addmount(mnt, nd);
 	return err;
@@ -942,14 +942,14 @@ out_unlock:
  */
 static noinline int do_change_type(struct nameidata *nd, int flag)
 {
-	struct vfsmount *m, *mnt = nd->mnt;
+	struct vfsmount *m, *mnt = nd->path.mnt;
 	int recurse = flag & MS_REC;
 	int type = flag & ~MS_REC;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (nd->dentry != nd->mnt->mnt_root)
+	if (nd->path.dentry != nd->path.mnt->mnt_root)
 		return -EINVAL;
 
 	down_write(&namespace_sem);
@@ -981,17 +981,17 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name,
 
 	down_write(&namespace_sem);
 	err = -EINVAL;
-	if (IS_MNT_UNBINDABLE(old_nd.mnt))
- 		goto out;
+	if (IS_MNT_UNBINDABLE(old_nd.path.mnt))
+		goto out;
 
-	if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+	if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
 		goto out;
 
 	err = -ENOMEM;
 	if (recurse)
-		mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
+		mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0);
 	else
-		mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
+		mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0);
 
 	if (!mnt)
 		goto out;
@@ -1021,24 +1021,24 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
 		      void *data)
 {
 	int err;
-	struct super_block *sb = nd->mnt->mnt_sb;
+	struct super_block *sb = nd->path.mnt->mnt_sb;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!check_mnt(nd->mnt))
+	if (!check_mnt(nd->path.mnt))
 		return -EINVAL;
 
-	if (nd->dentry != nd->mnt->mnt_root)
+	if (nd->path.dentry != nd->path.mnt->mnt_root)
 		return -EINVAL;
 
 	down_write(&sb->s_umount);
 	err = do_remount_sb(sb, flags, data, 0);
 	if (!err)
-		nd->mnt->mnt_flags = mnt_flags;
+		nd->path.mnt->mnt_flags = mnt_flags;
 	up_write(&sb->s_umount);
 	if (!err)
-		security_sb_post_remount(nd->mnt, flags, data);
+		security_sb_post_remount(nd->path.mnt, flags, data);
 	return err;
 }
 
@@ -1069,56 +1069,60 @@ static noinline int do_move_mount(struct nameidata *nd, char *old_name)
 		return err;
 
 	down_write(&namespace_sem);
-	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+	while (d_mountpoint(nd->path.dentry) &&
+	       follow_down(&nd->path.mnt, &nd->path.dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
+	if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt))
 		goto out;
 
 	err = -ENOENT;
-	mutex_lock(&nd->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(nd->dentry->d_inode))
+	mutex_lock(&nd->path.dentry->d_inode->i_mutex);
+	if (IS_DEADDIR(nd->path.dentry->d_inode))
 		goto out1;
 
-	if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
+	if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry))
 		goto out1;
 
 	err = -EINVAL;
-	if (old_nd.dentry != old_nd.mnt->mnt_root)
+	if (old_nd.path.dentry != old_nd.path.mnt->mnt_root)
 		goto out1;
 
-	if (old_nd.mnt == old_nd.mnt->mnt_parent)
+	if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent)
 		goto out1;
 
-	if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
-	      S_ISDIR(old_nd.dentry->d_inode->i_mode))
+	if (S_ISDIR(nd->path.dentry->d_inode->i_mode) !=
+	      S_ISDIR(old_nd.path.dentry->d_inode->i_mode))
 		goto out1;
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
+	if (old_nd.path.mnt->mnt_parent &&
+	    IS_MNT_SHARED(old_nd.path.mnt->mnt_parent))
 		goto out1;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
 	 * mount which is shared.
 	 */
-	if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
+	if (IS_MNT_SHARED(nd->path.mnt) &&
+	    tree_contains_unbindable(old_nd.path.mnt))
 		goto out1;
 	err = -ELOOP;
-	for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
-		if (p == old_nd.mnt)
+	for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent)
+		if (p == old_nd.path.mnt)
 			goto out1;
 
-	if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
+	err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd);
+	if (err)
 		goto out1;
 
 	spin_lock(&vfsmount_lock);
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_nd.mnt->mnt_expire);
+	list_del_init(&old_nd.path.mnt->mnt_expire);
 	spin_unlock(&vfsmount_lock);
 out1:
-	mutex_unlock(&nd->dentry->d_inode->i_mutex);
+	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
 out:
 	up_write(&namespace_sem);
 	if (!err)
@@ -1162,16 +1166,17 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
-	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+	while (d_mountpoint(nd->path.dentry) &&
+	       follow_down(&nd->path.mnt, &nd->path.dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->mnt))
+	if (!check_mnt(nd->path.mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
-	    nd->mnt->mnt_root == nd->dentry)
+	if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
+	    nd->path.mnt->mnt_root == nd->path.dentry)
 		goto unlock;
 
 	err = -EINVAL;
@@ -1697,12 +1702,14 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
 		if (fs) {
 			atomic_inc(&fs->count);
 			task_unlock(p);
-			if (fs->root == old_nd->dentry
-			    && fs->rootmnt == old_nd->mnt)
-				set_fs_root(fs, new_nd->mnt, new_nd->dentry);
-			if (fs->pwd == old_nd->dentry
-			    && fs->pwdmnt == old_nd->mnt)
-				set_fs_pwd(fs, new_nd->mnt, new_nd->dentry);
+			if (fs->root == old_nd->path.dentry
+			    && fs->rootmnt == old_nd->path.mnt)
+				set_fs_root(fs, new_nd->path.mnt,
+					    new_nd->path.dentry);
+			if (fs->pwd == old_nd->path.dentry
+			    && fs->pwdmnt == old_nd->path.mnt)
+				set_fs_pwd(fs, new_nd->path.mnt,
+					   new_nd->path.dentry);
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
@@ -1752,7 +1759,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	if (error)
 		goto out0;
 	error = -EINVAL;
-	if (!check_mnt(new_nd.mnt))
+	if (!check_mnt(new_nd.path.mnt))
 		goto out1;
 
 	error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
@@ -1766,55 +1773,59 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	}
 
 	read_lock(&current->fs->lock);
-	user_nd.mnt = mntget(current->fs->rootmnt);
-	user_nd.dentry = dget(current->fs->root);
+	user_nd.path.mnt = mntget(current->fs->rootmnt);
+	user_nd.path.dentry = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
-	mutex_lock(&old_nd.dentry->d_inode->i_mutex);
+	mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
 	error = -EINVAL;
-	if (IS_MNT_SHARED(old_nd.mnt) ||
-		IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
-		IS_MNT_SHARED(user_nd.mnt->mnt_parent))
+	if (IS_MNT_SHARED(old_nd.path.mnt) ||
+		IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
+		IS_MNT_SHARED(user_nd.path.mnt->mnt_parent))
 		goto out2;
-	if (!check_mnt(user_nd.mnt))
+	if (!check_mnt(user_nd.path.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new_nd.dentry->d_inode))
+	if (IS_DEADDIR(new_nd.path.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
+	if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
 		goto out2;
-	if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
+	if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
 		goto out2;
 	error = -EBUSY;
-	if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt)
+	if (new_nd.path.mnt == user_nd.path.mnt ||
+	    old_nd.path.mnt == user_nd.path.mnt)
 		goto out2; /* loop, on the same file system  */
 	error = -EINVAL;
-	if (user_nd.mnt->mnt_root != user_nd.dentry)
+	if (user_nd.path.mnt->mnt_root != user_nd.path.dentry)
 		goto out2; /* not a mountpoint */
-	if (user_nd.mnt->mnt_parent == user_nd.mnt)
+	if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt)
 		goto out2; /* not attached */
-	if (new_nd.mnt->mnt_root != new_nd.dentry)
+	if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
 		goto out2; /* not a mountpoint */
-	if (new_nd.mnt->mnt_parent == new_nd.mnt)
+	if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
 		goto out2; /* not attached */
-	tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
+	/* make sure we can reach put_old from new_root */
+	tmp = old_nd.path.mnt;
 	spin_lock(&vfsmount_lock);
-	if (tmp != new_nd.mnt) {
+	if (tmp != new_nd.path.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
 				goto out3; /* already mounted on put_old */
-			if (tmp->mnt_parent == new_nd.mnt)
+			if (tmp->mnt_parent == new_nd.path.mnt)
 				break;
 			tmp = tmp->mnt_parent;
 		}
-		if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
+		if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
 			goto out3;
-	} else if (!is_subdir(old_nd.dentry, new_nd.dentry))
+	} else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
 		goto out3;
-	detach_mnt(new_nd.mnt, &parent_nd);
-	detach_mnt(user_nd.mnt, &root_parent);
-	attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
-	attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
+	detach_mnt(new_nd.path.mnt, &parent_nd);
+	detach_mnt(user_nd.path.mnt, &root_parent);
+	/* mount old root on put_old */
+	attach_mnt(user_nd.path.mnt, &old_nd);
+	/* mount new_root on / */
+	attach_mnt(new_nd.path.mnt, &root_parent);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
@@ -1823,7 +1834,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	path_release(&root_parent);
 	path_release(&parent_nd);
 out2:
-	mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
 	up_write(&namespace_sem);
 	path_release(&user_nd);
 	path_release(&old_nd);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index be4ce1c3a3d8..3b6d83dc98a7 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -107,38 +107,40 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
-	dput(nd->dentry);
-	nd->dentry = dget(dentry);
+	dput(nd->path.dentry);
+	nd->path.dentry = dget(dentry);
 
 	/* Look it up again */
-	parent = dget_parent(nd->dentry);
+	parent = dget_parent(nd->path.dentry);
 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
-						  &nd->dentry->d_name,
+						  &nd->path.dentry->d_name,
 						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
 
 	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL)
-		mnt = nfs_do_refmount(nd->mnt, nd->dentry);
+		mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
 	else
-		mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr);
+		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh,
+				      &fattr);
 	err = PTR_ERR(mnt);
 	if (IS_ERR(mnt))
 		goto out_err;
 
 	mntget(mnt);
-	err = do_add_mount(mnt, nd, nd->mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list);
+	err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+			   &nfs_automount_list);
 	if (err < 0) {
 		mntput(mnt);
 		if (err == -EBUSY)
 			goto out_follow;
 		goto out_err;
 	}
-	mntput(nd->mnt);
-	dput(nd->dentry);
-	nd->mnt = mnt;
-	nd->dentry = dget(mnt->mnt_root);
+	mntput(nd->path.mnt);
+	dput(nd->path.dentry);
+	nd->path.mnt = mnt;
+	nd->path.dentry = dget(mnt->mnt_root);
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
@@ -149,7 +151,8 @@ out_err:
 	path_release(nd);
 	goto out;
 out_follow:
-	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+	while (d_mountpoint(nd->path.dentry) &&
+	       follow_down(&nd->path.mnt, &nd->path.dentry))
 		;
 	err = 0;
 	goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 027e1095256e..7ce07862c2fb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1384,11 +1384,11 @@ out_close:
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *parent;
 	struct path path = {
-		.mnt = nd->mnt,
+		.mnt = nd->path.mnt,
 		.dentry = dentry,
 	};
+	struct dentry *parent;
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -1433,7 +1433,7 @@ int
 nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
 	struct path path = {
-		.mnt = nd->mnt,
+		.mnt = nd->path.mnt,
 		.dentry = dentry,
 	};
 	struct rpc_cred *cred;
@@ -1885,7 +1885,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags, struct nameidata *nd)
 {
 	struct path path = {
-		.mnt = nd->mnt,
+		.mnt = nd->path.mnt,
 		.dentry = dentry,
 	};
 	struct nfs4_state *state;
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 51f1b31acbf6..49ef0b4d4439 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -41,7 +41,7 @@ static struct file *do_open(char *name, int flags)
 		error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
 
 	if (!error)
-		return dentry_open(nd.dentry, nd.mnt, flags);
+		return dentry_open(nd.path.dentry, nd.path.mnt, flags);
 
 	path_release(&nd);
 	return ERR_PTR(error);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 346570f6d848..2ac0e30285c2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -169,8 +169,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 			goto out;
 
 		dprintk("Found the path %s\n", buf);
-		key.ek_mnt = nd.mnt;
-		key.ek_dentry = nd.dentry;
+		key.ek_mnt = nd.path.mnt;
+		key.ek_dentry = nd.path.dentry;
 		
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
@@ -507,7 +507,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 	struct svc_export exp, *expp;
 	int an_int;
 
-	nd.dentry = NULL;
+	nd.path.dentry = NULL;
 	exp.ex_path = NULL;
 
 	/* fs locations */
@@ -547,8 +547,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 
 	exp.h.flags = 0;
 	exp.ex_client = dom;
-	exp.ex_mnt = nd.mnt;
-	exp.ex_dentry = nd.dentry;
+	exp.ex_mnt = nd.path.mnt;
+	exp.ex_dentry = nd.path.dentry;
 	exp.ex_path = kstrdup(buf, GFP_KERNEL);
 	err = -ENOMEM;
 	if (!exp.ex_path)
@@ -610,7 +610,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 				goto out;
 		}
 
-		err = check_export(nd.dentry->d_inode, exp.ex_flags,
+		err = check_export(nd.path.dentry->d_inode, exp.ex_flags,
 				   exp.ex_uuid);
 		if (err) goto out;
 	}
@@ -629,7 +629,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 	nfsd4_fslocs_free(&exp.ex_fslocs);
 	kfree(exp.ex_uuid);
  	kfree(exp.ex_path);
-	if (nd.dentry)
+	if (nd.path.dentry)
 		path_release(&nd);
  out_no_path:
 	if (dom)
@@ -1030,7 +1030,7 @@ exp_export(struct nfsctl_export *nxp)
 		goto out_unlock;
 	err = -EINVAL;
 
-	exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL);
+	exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
 
 	memset(&new, 0, sizeof(new));
 
@@ -1038,7 +1038,8 @@ exp_export(struct nfsctl_export *nxp)
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) &&
 	    fsid_key->ek_mnt &&
-	    (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) )
+	    (fsid_key->ek_mnt != nd.path.mnt ||
+	     fsid_key->ek_dentry != nd.path.dentry))
 		goto finish;
 
 	if (!IS_ERR(exp)) {
@@ -1054,7 +1055,7 @@ exp_export(struct nfsctl_export *nxp)
 		goto finish;
 	}
 
-	err = check_export(nd.dentry->d_inode, nxp->ex_flags, NULL);
+	err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL);
 	if (err) goto finish;
 
 	err = -ENOMEM;
@@ -1067,8 +1068,8 @@ exp_export(struct nfsctl_export *nxp)
 	if (!new.ex_path)
 		goto finish;
 	new.ex_client = clp;
-	new.ex_mnt = nd.mnt;
-	new.ex_dentry = nd.dentry;
+	new.ex_mnt = nd.path.mnt;
+	new.ex_dentry = nd.path.dentry;
 	new.ex_flags = nxp->ex_flags;
 	new.ex_anon_uid = nxp->ex_anon_uid;
 	new.ex_anon_gid = nxp->ex_anon_gid;
@@ -1148,7 +1149,7 @@ exp_unexport(struct nfsctl_export *nxp)
 		goto out_domain;
 
 	err = -EINVAL;
-	exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+	exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL);
 	path_release(&nd);
 	if (IS_ERR(exp))
 		goto out_domain;
@@ -1185,12 +1186,12 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
 		printk("nfsd: exp_rootfh path not found %s", path);
 		return err;
 	}
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 
 	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
-		 path, nd.dentry, clp->name,
+		 path, nd.path.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
-	exp = exp_parent(clp, nd.mnt, nd.dentry, NULL);
+	exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL);
 	if (IS_ERR(exp)) {
 		err = PTR_ERR(exp);
 		goto out;
@@ -1200,7 +1201,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
 	 * fh must be initialized before calling fh_compose
 	 */
 	fh_init(&fh, maxsize);
-	if (fh_compose(&fh, exp, nd.dentry, NULL))
+	if (fh_compose(&fh, exp, nd.path.dentry, NULL))
 		err = -EINVAL;
 	else
 		err = 0;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1602cd00dd45..a7a8fdf86ea7 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -120,9 +120,9 @@ out_no_tfm:
 static void
 nfsd4_sync_rec_dir(void)
 {
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
-	nfsd_sync_dir(rec_dir.dentry);
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex);
+	nfsd_sync_dir(rec_dir.path.dentry);
+	mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex);
 }
 
 int
@@ -142,9 +142,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	nfs4_save_user(&uid, &gid);
 
 	/* lock the parent */
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex);
 
-	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+	dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
 		goto out_unlock;
@@ -154,11 +154,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 		dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
 		goto out_put;
 	}
-	status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+	status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
 out_put:
 	dput(dentry);
 out_unlock:
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex);
 	if (status == 0) {
 		clp->cl_firststate = 1;
 		nfsd4_sync_rec_dir();
@@ -221,7 +221,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
 
 	nfs4_save_user(&uid, &gid);
 
-	filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
+	filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY);
 	status = PTR_ERR(filp);
 	if (IS_ERR(filp))
 		goto out;
@@ -286,9 +286,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
-	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex);
+	dentry = lookup_one_len(name, rec_dir.path.dentry, namlen);
+	mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
 		return status;
@@ -297,7 +297,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
 	if (!dentry->d_inode)
 		goto out;
 
-	status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+	status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry);
 out:
 	dput(dentry);
 	return status;
@@ -347,12 +347,12 @@ nfsd4_recdir_purge_old(void) {
 
 	if (!rec_dir_init)
 		return;
-	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+	status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
 	if (status == 0)
 		nfsd4_sync_rec_dir();
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
-			" directory %s\n", rec_dir.dentry->d_name.name);
+			" directory %s\n", rec_dir.path.dentry->d_name.name);
 	return;
 }
 
@@ -373,10 +373,10 @@ int
 nfsd4_recdir_load(void) {
 	int status;
 
-	status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+	status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
-			" directory %s\n", rec_dir.dentry->d_name.name);
+			" directory %s\n", rec_dir.path.dentry->d_name.name);
 	return status;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6744bc03dae..be2b9ecd230a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3261,7 +3261,7 @@ nfs4_reset_recoverydir(char *recdir)
 	if (status)
 		return status;
 	status = -ENOTDIR;
-	if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
+	if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) {
 		nfs4_set_recdir(recdir);
 		status = 0;
 	}
diff --git a/fs/open.c b/fs/open.c
index 43fcd6031969..279aacf25600 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -127,7 +127,7 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry, &tmp);
+		error = vfs_statfs_native(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -146,7 +146,7 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry, &tmp);
+		error = vfs_statfs64(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -233,7 +233,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	error = user_path_walk(path, &nd);
 	if (error)
 		goto out;
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 	error = -EISDIR;
@@ -271,7 +271,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error) {
 		DQUOT_INIT(inode);
-		error = do_truncate(nd.dentry, length, 0, NULL);
+		error = do_truncate(nd.path.dentry, length, 0, NULL);
 	}
 
 put_write_and_out:
@@ -455,10 +455,10 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	res = vfs_permission(&nd, mode);
 	/* SuS v2 requires we report a read only fs too */
 	if(res || !(mode & S_IWOTH) ||
-	   special_file(nd.dentry->d_inode->i_mode))
+	   special_file(nd.path.dentry->d_inode->i_mode))
 		goto out_path_release;
 
-	if(IS_RDONLY(nd.dentry->d_inode))
+	if(IS_RDONLY(nd.path.dentry->d_inode))
 		res = -EROFS;
 
 out_path_release:
@@ -490,7 +490,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	if (error)
 		goto dput_and_out;
 
-	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+	set_fs_pwd(current->fs, nd.path.mnt, nd.path.dentry);
 
 dput_and_out:
 	path_release(&nd);
@@ -545,7 +545,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
 
-	set_fs_root(current->fs, nd.mnt, nd.dentry);
+	set_fs_root(current->fs, nd.path.mnt, nd.path.dentry);
 	set_fs_altroot();
 	error = 0;
 dput_and_out:
@@ -602,7 +602,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
 	if (error)
 		goto out;
-	inode = nd.dentry->d_inode;
+	inode = nd.path.dentry->d_inode;
 
 	error = -EROFS;
 	if (IS_RDONLY(inode))
@@ -617,7 +617,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(nd.dentry, &newattrs);
+	error = notify_change(nd.path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
 
 dput_and_out:
@@ -675,7 +675,7 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 	error = user_path_walk(filename, &nd);
 	if (error)
 		goto out;
-	error = chown_common(nd.dentry, user, group);
+	error = chown_common(nd.path.dentry, user, group);
 	path_release(&nd);
 out:
 	return error;
@@ -695,7 +695,7 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 	error = __user_walk_fd(dfd, filename, follow, &nd);
 	if (error)
 		goto out;
-	error = chown_common(nd.dentry, user, group);
+	error = chown_common(nd.path.dentry, user, group);
 	path_release(&nd);
 out:
 	return error;
@@ -709,7 +709,7 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
 	error = user_path_walk_link(filename, &nd);
 	if (error)
 		goto out;
-	error = chown_common(nd.dentry, user, group);
+	error = chown_common(nd.path.dentry, user, group);
 	path_release(&nd);
 out:
 	return error;
@@ -863,7 +863,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
 		goto out;
 	if (IS_ERR(dentry))
 		goto out_err;
-	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
+	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
 					     nd->intent.open.flags - 1,
 					     nd->intent.open.file,
 					     open);
@@ -891,7 +891,8 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL)
-		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
+		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
+				     NULL);
 	else
 		path_release(nd);
 	return filp;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7c6b4ec83cb7..0ef52230f8c7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1170,7 +1170,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path.dentry,
+						&nd->path.mnt);
 	nd->last_type = LAST_BIND;
 out:
 	return ERR_PTR(error);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b9cb23c08f63..614c34b6d1c2 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -407,7 +407,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
 	if (!nd || !depth)
 		goto out;
 
-	dentry = nd->dentry;
+	dentry = nd->path.dentry;
 	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
 
 	/* If the entry does not exist deny permission */
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6033f0c3bd0b..2d1d6ac0c3f7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2026,12 +2026,12 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 	if (err)
 		return err;
 	/* Quotafile not on the same filesystem? */
-	if (nd.mnt->mnt_sb != sb) {
+	if (nd.path.mnt->mnt_sb != sb) {
 		path_release(&nd);
 		return -EXDEV;
 	}
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
-	if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) {
+	if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) {
 		reiserfs_warning(sb,
 				 "reiserfs: Quota file must have tail packing disabled.");
 		path_release(&nd);
@@ -2044,7 +2044,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return vfs_quota_on(sb, type, format_id, path);
 	}
 	/* Quotafile not of fs root? */
-	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
+	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
 		reiserfs_warning(sb,
 				 "reiserfs: Quota file not on filesystem root. "
 				 "Journalled quota will not work.");
diff --git a/fs/stat.c b/fs/stat.c
index 68510068a641..82680f2c01d2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -62,7 +62,7 @@ int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 
 	error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
 	if (!error) {
-		error = vfs_getattr(nd.mnt, nd.dentry, stat);
+		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
 		path_release(&nd);
 	}
 	return error;
@@ -82,7 +82,7 @@ int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 
 	error = __user_walk_fd(dfd, name, 0, &nd);
 	if (!error) {
-		error = vfs_getattr(nd.mnt, nd.dentry, stat);
+		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
 		path_release(&nd);
 	}
 	return error;
@@ -302,14 +302,15 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path,
 
 	error = __user_walk_fd(dfd, path, 0, &nd);
 	if (!error) {
-		struct inode * inode = nd.dentry->d_inode;
+		struct inode *inode = nd.path.dentry->d_inode;
 
 		error = -EINVAL;
 		if (inode->i_op && inode->i_op->readlink) {
-			error = security_inode_readlink(nd.dentry);
+			error = security_inode_readlink(nd.path.dentry);
 			if (!error) {
-				touch_atime(nd.mnt, nd.dentry);
-				error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+				touch_atime(nd.path.mnt, nd.path.dentry);
+				error = inode->i_op->readlink(nd.path.dentry,
+							      buf, bufsiz);
 			}
 		}
 		path_release(&nd);
diff --git a/fs/utimes.c b/fs/utimes.c
index e5588cd8530e..679b08288a66 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -84,7 +84,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		if (error)
 			goto out;
 
-		dentry = nd.dentry;
+		dentry = nd.path.dentry;
 	}
 
 	inode = dentry->d_inode;
diff --git a/fs/xattr.c b/fs/xattr.c
index f7c8f87bb390..be0ee756c5f1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -262,7 +262,7 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
 	error = user_path_walk(path, &nd);
 	if (error)
 		return error;
-	error = setxattr(nd.dentry, name, value, size, flags);
+	error = setxattr(nd.path.dentry, name, value, size, flags);
 	path_release(&nd);
 	return error;
 }
@@ -277,7 +277,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
 	error = user_path_walk_link(path, &nd);
 	if (error)
 		return error;
-	error = setxattr(nd.dentry, name, value, size, flags);
+	error = setxattr(nd.path.dentry, name, value, size, flags);
 	path_release(&nd);
 	return error;
 }
@@ -347,7 +347,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
 	error = user_path_walk(path, &nd);
 	if (error)
 		return error;
-	error = getxattr(nd.dentry, name, value, size);
+	error = getxattr(nd.path.dentry, name, value, size);
 	path_release(&nd);
 	return error;
 }
@@ -362,7 +362,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
 	error = user_path_walk_link(path, &nd);
 	if (error)
 		return error;
-	error = getxattr(nd.dentry, name, value, size);
+	error = getxattr(nd.path.dentry, name, value, size);
 	path_release(&nd);
 	return error;
 }
@@ -421,7 +421,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
 	error = user_path_walk(path, &nd);
 	if (error)
 		return error;
-	error = listxattr(nd.dentry, list, size);
+	error = listxattr(nd.path.dentry, list, size);
 	path_release(&nd);
 	return error;
 }
@@ -435,7 +435,7 @@ sys_llistxattr(char __user *path, char __user *list, size_t size)
 	error = user_path_walk_link(path, &nd);
 	if (error)
 		return error;
-	error = listxattr(nd.dentry, list, size);
+	error = listxattr(nd.path.dentry, list, size);
 	path_release(&nd);
 	return error;
 }
@@ -482,7 +482,7 @@ sys_removexattr(char __user *path, char __user *name)
 	error = user_path_walk(path, &nd);
 	if (error)
 		return error;
-	error = removexattr(nd.dentry, name);
+	error = removexattr(nd.path.dentry, name);
 	path_release(&nd);
 	return error;
 }
@@ -496,7 +496,7 @@ sys_lremovexattr(char __user *path, char __user *name)
 	error = user_path_walk_link(path, &nd);
 	if (error)
 		return error;
-	error = removexattr(nd.dentry, name);
+	error = removexattr(nd.path.dentry, name);
 	path_release(&nd);
 	return error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4c82a050a3a8..f052a108bcc1 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -91,9 +91,9 @@ xfs_find_handle(
 		if (error)
 			return error;
 
-		ASSERT(nd.dentry);
-		ASSERT(nd.dentry->d_inode);
-		inode = igrab(nd.dentry->d_inode);
+		ASSERT(nd.path.dentry);
+		ASSERT(nd.path.dentry->d_inode);
+		inode = igrab(nd.path.dentry->d_inode);
 		path_release(&nd);
 		break;
 	}
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1cd15dad2469..52fa2f78bb71 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,8 +16,7 @@ struct open_intent {
 enum { MAX_NESTED_LINKS = 8 };
 
 struct nameidata {
-	struct dentry	*dentry;
-	struct vfsmount *mnt;
+	struct path	path;
 	struct qstr	last;
 	unsigned int	flags;
 	int		last_type;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f4fcf58f20f8..b898814fe4a0 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -549,7 +549,7 @@ void audit_trim_trees(void)
 		if (err)
 			goto skip_it;
 
-		root_mnt = collect_mounts(nd.mnt, nd.dentry);
+		root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
 		path_release(&nd);
 		if (!root_mnt)
 			goto skip_it;
@@ -583,17 +583,17 @@ skip_it:
 static int is_under(struct vfsmount *mnt, struct dentry *dentry,
 		    struct nameidata *nd)
 {
-	if (mnt != nd->mnt) {
+	if (mnt != nd->path.mnt) {
 		for (;;) {
 			if (mnt->mnt_parent == mnt)
 				return 0;
-			if (mnt->mnt_parent == nd->mnt)
+			if (mnt->mnt_parent == nd->path.mnt)
 					break;
 			mnt = mnt->mnt_parent;
 		}
 		dentry = mnt->mnt_mountpoint;
 	}
-	return is_subdir(dentry, nd->dentry);
+	return is_subdir(dentry, nd->path.dentry);
 }
 
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
@@ -641,7 +641,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
 	err = path_lookup(tree->pathname, 0, &nd);
 	if (err)
 		goto Err;
-	mnt = collect_mounts(nd.mnt, nd.dentry);
+	mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
 	path_release(&nd);
 	if (!mnt) {
 		err = -ENOMEM;
@@ -701,7 +701,7 @@ int audit_tag_tree(char *old, char *new)
 	err = path_lookup(new, 0, &nd);
 	if (err)
 		return err;
-	tagged = collect_mounts(nd.mnt, nd.dentry);
+	tagged = collect_mounts(nd.path.mnt, nd.path.dentry);
 	path_release(&nd);
 	if (!tagged)
 		return -ENOMEM;
@@ -711,8 +711,8 @@ int audit_tag_tree(char *old, char *new)
 		drop_collected_mounts(tagged);
 		return err;
 	}
-	mnt = mntget(nd.mnt);
-	dentry = dget(nd.dentry);
+	mnt = mntget(nd.path.mnt);
+	dentry = dget(nd.path.dentry);
 	path_release(&nd);
 
 	if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 6f19fd477aac..a36e66797c3d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -169,8 +169,8 @@ static struct audit_parent *audit_init_parent(struct nameidata *ndp)
 	inotify_init_watch(&parent->wdata);
 	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
 	get_inotify_watch(&parent->wdata);
-	wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
-			       AUDIT_IN_WATCH);
+	wd = inotify_add_watch(audit_ih, &parent->wdata,
+			       ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
 	if (wd < 0) {
 		audit_free_parent(&parent->wdata);
 		return ERR_PTR(wd);
@@ -1214,8 +1214,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
 
 	/* update watch filter fields */
 	if (ndw) {
-		watch->dev = ndw->dentry->d_inode->i_sb->s_dev;
-		watch->ino = ndw->dentry->d_inode->i_ino;
+		watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
+		watch->ino = ndw->path.dentry->d_inode->i_ino;
 	}
 
 	/* The audit_filter_mutex must not be held during inotify calls because
@@ -1225,7 +1225,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
 	 */
 	mutex_unlock(&audit_filter_mutex);
 
-	if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) {
+	if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
+			       &i_watch) < 0) {
 		parent = audit_init_parent(ndp);
 		if (IS_ERR(parent)) {
 			/* caller expects mutex locked */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0e3ead7e11b9..6bc3babf6175 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -668,7 +668,8 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 
 	if ((error = rpc_lookup_parent(path, nd)) != 0)
 		return ERR_PTR(error);
-	dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1);
+	dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len,
+				   1);
 	if (IS_ERR(dentry))
 		rpc_release_path(nd);
 	return dentry;
@@ -695,7 +696,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 	dentry = rpc_lookup_negative(path, &nd);
 	if (IS_ERR(dentry))
 		return dentry;
-	dir = nd.dentry->d_inode;
+	dir = nd.path.dentry->d_inode;
 	if ((error = __rpc_mkdir(dir, dentry)) != 0)
 		goto err_dput;
 	RPC_I(dentry->d_inode)->private = rpc_client;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eea75888805e..7c3323e8827b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -718,14 +718,14 @@ static struct sock *unix_find_other(struct net *net,
 			goto put_fail;
 
 		err = -ECONNREFUSED;
-		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
+		if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
 			goto put_fail;
-		u=unix_find_socket_byinode(net, nd.dentry->d_inode);
+		u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
 		if (!u)
 			goto put_fail;
 
 		if (u->sk_type == type)
-			touch_atime(nd.mnt, nd.dentry);
+			touch_atime(nd.path.mnt, nd.path.dentry);
 
 		path_release(&nd);
 
@@ -819,12 +819,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		 */
 		mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
-		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
+		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 		if (err)
 			goto out_mknod_dput;
-		mutex_unlock(&nd.dentry->d_inode->i_mutex);
-		dput(nd.dentry);
-		nd.dentry = dentry;
+		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+		dput(nd.path.dentry);
+		nd.path.dentry = dentry;
 
 		addr->hash = UNIX_HASH_SIZE;
 	}
@@ -842,8 +842,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		list = &unix_socket_table[addr->hash];
 	} else {
 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
-		u->dentry = nd.dentry;
-		u->mnt    = nd.mnt;
+		u->dentry = nd.path.dentry;
+		u->mnt    = nd.path.mnt;
 	}
 
 	err = 0;
@@ -861,7 +861,7 @@ out:
 out_mknod_dput:
 	dput(dentry);
 out_mknod_unlock:
-	mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_release(&nd);
 out_mknod_parent:
 	if (err==-EEXIST)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 44f16d9041e3..ffeefa3c2c77 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2341,10 +2341,10 @@ static int selinux_mount(char * dev_name,
 		return rc;
 
 	if (flags & MS_REMOUNT)
-		return superblock_has_perm(current, nd->mnt->mnt_sb,
+		return superblock_has_perm(current, nd->path.mnt->mnt_sb,
 		                           FILESYSTEM__REMOUNT, NULL);
 	else
-		return dentry_has_perm(current, nd->mnt, nd->dentry,
+		return dentry_has_perm(current, nd->path.mnt, nd->path.dentry,
 		                       FILE__MOUNTON);
 }
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 5b690482f8cb..2b5d6f72f678 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -325,7 +325,7 @@ static int smack_sb_statfs(struct dentry *dentry)
 static int smack_sb_mount(char *dev_name, struct nameidata *nd,
 			  char *type, unsigned long flags, void *data)
 {
-	struct superblock_smack *sbp = nd->mnt->mnt_sb->s_security;
+	struct superblock_smack *sbp = nd->path.mnt->mnt_sb->s_security;
 
 	return smk_curacc(sbp->smk_floor, MAY_WRITE);
 }
-- 
cgit 


From 1d957f9bf87da74f420424d16ece005202bbebd3 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:35 -0800
Subject: Introduce path_put()

* Add path_put() functions for releasing a reference to the dentry and
  vfsmount of a struct path in the right order

* Switch from path_release(nd) to path_put(&nd->path)

* Rename dput_path() to path_put_conditional()

[akpm@linux-foundation.org: fix cifs]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: <linux-fsdevel@vger.kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Steven French <sfrench@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/osf_sys.c                  |  2 +-
 arch/mips/kernel/sysirix.c                   |  6 +--
 arch/parisc/hpux/sys_hpux.c                  |  2 +-
 arch/powerpc/platforms/cell/spufs/syscalls.c |  2 +-
 arch/sparc64/solaris/fs.c                    |  4 +-
 drivers/md/dm-table.c                        |  2 +-
 drivers/mtd/mtdsuper.c                       |  4 +-
 fs/afs/mntpt.c                               |  2 +-
 fs/autofs4/root.c                            |  2 +-
 fs/block_dev.c                               |  2 +-
 fs/cifs/cifs_dfs_ref.c                       |  2 +-
 fs/coda/pioctl.c                             |  4 +-
 fs/compat.c                                  |  4 +-
 fs/configfs/symlink.c                        |  4 +-
 fs/dquot.c                                   |  2 +-
 fs/ecryptfs/main.c                           |  2 +-
 fs/exec.c                                    |  4 +-
 fs/ext3/super.c                              |  4 +-
 fs/ext4/super.c                              |  4 +-
 fs/gfs2/ops_fstype.c                         |  2 +-
 fs/inotify_user.c                            |  4 +-
 fs/namei.c                                   | 56 +++++++++++++++-------------
 fs/namespace.c                               | 20 +++++-----
 fs/nfs/namespace.c                           |  2 +-
 fs/nfsctl.c                                  |  2 +-
 fs/nfsd/export.c                             | 10 ++---
 fs/nfsd/nfs4recover.c                        |  2 +-
 fs/nfsd/nfs4state.c                          |  2 +-
 fs/open.c                                    | 22 +++++------
 fs/proc/base.c                               |  2 +-
 fs/reiserfs/super.c                          |  8 ++--
 fs/stat.c                                    |  6 +--
 fs/utimes.c                                  |  2 +-
 fs/xattr.c                                   | 16 ++++----
 fs/xfs/linux-2.6/xfs_ioctl.c                 |  2 +-
 include/linux/namei.h                        |  1 -
 include/linux/path.h                         |  2 +
 kernel/audit_tree.c                          | 12 +++---
 kernel/auditfilter.c                         |  4 +-
 net/sunrpc/rpc_pipe.c                        |  2 +-
 net/unix/af_unix.c                           |  6 +--
 41 files changed, 125 insertions(+), 118 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index f2bef5e14faa..8c71daf94a59 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -260,7 +260,7 @@ osf_statfs(char __user *path, struct osf_statfs __user *buffer, unsigned long bu
 	retval = user_path_walk(path, &nd);
 	if (!retval) {
 		retval = do_osf_statfs(nd.path.dentry, buffer, bufsiz);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return retval;
 }
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 49d6292ffa05..672fba84b2cc 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -711,7 +711,7 @@ asmlinkage int irix_statfs(const char __user *path,
 	}
 
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -1385,7 +1385,7 @@ asmlinkage int irix_statvfs(char __user *fname, struct irix_statvfs __user *buf)
 		error |= __put_user(0, &buf->f_fstr[i]);
 
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -1636,7 +1636,7 @@ asmlinkage int irix_statvfs64(char __user *fname, struct irix_statvfs64 __user *
 		error |= __put_user(0, &buf->f_fstr[i]);
 
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index d7395af3e846..0c5b9dabb475 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -222,7 +222,7 @@ asmlinkage long hpux_statfs(const char __user *path,
 		error = vfs_statfs_hpux(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 430404413178..49c87769b1f8 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -73,7 +73,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
 				LOOKUP_OPEN|LOOKUP_CREATE, &nd);
 		if (!ret) {
 			ret = spufs_create(&nd, flags, mode, neighbor);
-			path_release(&nd);
+			path_put(&nd.path);
 		}
 		putname(tmp);
 	}
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 516932e9f70b..7d035f0d3ae1 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -436,7 +436,7 @@ asmlinkage int solaris_statvfs(u32 path, u32 buf)
 	if (!error) {
 		struct inode *inode = nd.path.dentry->d_inode;
 		error = report_statvfs(nd.path.mnt, inode, buf);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -466,7 +466,7 @@ asmlinkage int solaris_statvfs64(u32 path, u32 buf)
 	if (!error) {
 		struct inode *inode = nd.path.dentry->d_inode;
 		error = report_statvfs64(nd.path.mnt, inode, buf);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	unlock_kernel();
 	return error;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index b611a3c61504..e75b1437b58b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -375,7 +375,7 @@ static int lookup_device(const char *path, dev_t *dev)
 	*dev = inode->i_rdev;
 
  out:
-	path_release(&nd);
+	path_put(&nd.path);
 	return r;
 }
 
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index e376f4517905..28cc6787a800 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -203,7 +203,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 		goto not_an_MTD_device;
 
 	mtdnr = iminor(nd.path.dentry->d_inode);
-	path_release(&nd);
+	path_put(&nd.path);
 
 	return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
 			     mnt);
@@ -214,7 +214,7 @@ not_an_MTD_device:
 		       "MTD: Attempt to mount non-MTD device \"%s\"\n",
 		       dev_name);
 out:
-	path_release(&nd);
+	path_put(&nd.path);
 	return ret;
 
 }
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 4136dfb9ffb8..e13cea220669 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -227,7 +227,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 
 	newmnt = afs_mntpt_do_automount(nd->path.dentry);
 	if (IS_ERR(newmnt)) {
-		path_release(nd);
+		path_put(&nd->path);
 		return (void *)newmnt;
 	}
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a119c863ff37..a54a946a50ae 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -383,7 +383,7 @@ done:
 	return NULL;
 
 out_error:
-	path_release(nd);
+	path_put(&nd->path);
 	return ERR_PTR(status);
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5f4721fdbdb6..67fe72ce6ac7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1409,7 +1409,7 @@ struct block_device *lookup_bdev(const char *path)
 	if (!bdev)
 		goto fail;
 out:
-	path_release(&nd);
+	path_put(&nd.path);
 	return bdev;
 fail:
 	bdev = ERR_PTR(error);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index bcd53c2fe781..6ad447529961 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -368,7 +368,7 @@ out:
 	cFYI(1, ("leaving %s" , __FUNCTION__));
 	return ERR_PTR(rc);
 out_err:
-	path_release(nd);
+	path_put(&nd->path);
 	goto out;
 }
 
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3b6a1b721b46..c21a1f552a63 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -80,7 +80,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
 	
 	/* return if it is not a Coda inode */
 	if ( target_inode->i_sb != inode->i_sb ) {
-		path_release(&nd);
+		path_put(&nd.path);
 	        return  -EINVAL;
 	}
 
@@ -89,7 +89,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
 
 	error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
 
-	path_release(&nd);
+	path_put(&nd.path);
         return error;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index a8d62375ada1..43ca0165740c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -244,7 +244,7 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs
 		error = vfs_statfs(nd.path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s
 		error = vfs_statfs(nd.path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index cda3ea001ae6..78929ea84ff2 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -103,7 +103,7 @@ static int get_target(const char *symname, struct nameidata *nd,
 			*target = configfs_get_config_item(nd->path.dentry);
 			if (!*target) {
 				ret = -ENOENT;
-				path_release(nd);
+				path_put(&nd->path);
 			}
 		} else
 			ret = -EPERM;
@@ -141,7 +141,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 		ret = create_link(parent_item, target_item, dentry);
 
 	config_item_put(target_item);
-	path_release(&nd);
+	path_put(&nd.path);
 
 out_put:
 	config_item_put(parent_item);
diff --git a/fs/dquot.c b/fs/dquot.c
index 289f48d2c727..9c7feb62eed1 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1643,7 +1643,7 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
 		error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
 					   format_id);
 out_path:
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a70555a6472c..d25ac9500a92 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -526,7 +526,7 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
 	rc = 0;
 	goto out;
 out_free:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return rc;
 }
diff --git a/fs/exec.c b/fs/exec.c
index 7a12d2d1ac11..a44b142fb460 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -148,7 +148,7 @@ out:
   	return error;
 exit:
 	release_open_intent(&nd);
-	path_release(&nd);
+	path_put(&nd.path);
 	goto out;
 }
 
@@ -672,7 +672,7 @@ out:
 			}
 		}
 		release_open_intent(&nd);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	goto out;
 }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0b5057e0dc1e..18769cc32377 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2759,7 +2759,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_release(&nd);
+		path_put(&nd.path);
 		return -EXDEV;
 	}
 	/* Quotafile not of fs root? */
@@ -2767,7 +2767,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 		printk(KERN_WARNING
 			"EXT3-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
-	path_release(&nd);
+	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path);
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 37117990073d..13383ba18f1d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3159,7 +3159,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_release(&nd);
+		path_put(&nd.path);
 		return -EXDEV;
 	}
 	/* Quotafile not of fs root? */
@@ -3167,7 +3167,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		printk(KERN_WARNING
 			"EXT4-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
-	path_release(&nd);
+	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path);
 }
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f4ced7fcda82..4bee6aa845e4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -900,7 +900,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
 	       "mount point %s\n", dev_name);
 
 free_nd:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return sb;
 }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index e9c58652533a..7b94a1e3c015 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -367,7 +367,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 	/* you can only watch an inode if you have read permissions on it */
 	error = vfs_permission(nd, MAY_READ);
 	if (error)
-		path_release(nd);
+		path_put(&nd->path);
 	return error;
 }
 
@@ -676,7 +676,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		ret = create_watch(dev, inode, mask);
 	mutex_unlock(&dev->up_mutex);
 
-	path_release(&nd);
+	path_put(&nd.path);
 fput_and_out:
 	fput_light(filp, fput_needed);
 	return ret;
diff --git a/fs/namei.c b/fs/namei.c
index c9b05a71c39c..b0df7ea733d7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -362,11 +362,18 @@ int deny_write_access(struct file * file)
 	return 0;
 }
 
-void path_release(struct nameidata *nd)
+/**
+ * path_put - put a reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put(struct path *path)
 {
-	dput(nd->path.dentry);
-	mntput(nd->path.mnt);
+	dput(path->dentry);
+	mntput(path->mnt);
 }
+EXPORT_SYMBOL(path_put);
 
 /**
  * release_open_intent - free up open intent resources
@@ -551,7 +558,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		goto fail;
 
 	if (*link == '/') {
-		path_release(nd);
+		path_put(&nd->path);
 		if (!walk_init_root(link, nd))
 			/* weird __emul_prefix() stuff did it */
 			goto out;
@@ -567,18 +574,18 @@ out:
 	 */
 	name = __getname();
 	if (unlikely(!name)) {
-		path_release(nd);
+		path_put(&nd->path);
 		return -ENOMEM;
 	}
 	strcpy(name, nd->last.name);
 	nd->last.name = name;
 	return 0;
 fail:
-	path_release(nd);
+	path_put(&nd->path);
 	return PTR_ERR(link);
 }
 
-static inline void dput_path(struct path *path, struct nameidata *nd)
+static void path_put_conditional(struct path *path, struct nameidata *nd)
 {
 	dput(path->dentry);
 	if (path->mnt != nd->path.mnt)
@@ -651,8 +658,8 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
 	nd->depth--;
 	return err;
 loop:
-	dput_path(path, nd);
-	path_release(nd);
+	path_put_conditional(path, nd);
+	path_put(&nd->path);
 	return err;
 }
 
@@ -993,10 +1000,10 @@ return_reval:
 return_base:
 		return 0;
 out_dput:
-		dput_path(&next, nd);
+		path_put_conditional(&next, nd);
 		break;
 	}
-	path_release(nd);
+	path_put(&nd->path);
 return_err:
 	return err;
 }
@@ -1070,7 +1077,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 				mntput(old_mnt);
 				return 1;
 			}
-			path_release(nd);
+			path_put(&nd->path);
 		}
 		nd->path.dentry = old_dentry;
 		nd->path.mnt = old_mnt;
@@ -1230,7 +1237,7 @@ static int __path_lookup_intent_open(int dfd, const char *name,
 	if (IS_ERR(nd->intent.open.file)) {
 		if (err == 0) {
 			err = PTR_ERR(nd->intent.open.file);
-			path_release(nd);
+			path_put(&nd->path);
 		}
 	} else if (err != 0)
 		release_open_intent(nd);
@@ -1806,11 +1813,11 @@ ok:
 	return 0;
 
 exit_dput:
-	dput_path(&path, nd);
+	path_put_conditional(&path, nd);
 exit:
 	if (!IS_ERR(nd->intent.open.file))
 		release_open_intent(nd);
-	path_release(nd);
+	path_put(&nd->path);
 	return error;
 
 do_link:
@@ -1979,7 +1986,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 		dput(dentry);
 	}
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	putname(tmp);
 
@@ -2039,7 +2046,7 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	dput(dentry);
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	putname(tmp);
 out_err:
@@ -2147,7 +2154,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
 exit2:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 exit1:
-	path_release(&nd);
+	path_put(&nd.path);
 exit:
 	putname(name);
 	return error;
@@ -2231,7 +2238,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	if (inode)
 		iput(inode);	/* truncate the inode here */
 exit1:
-	path_release(&nd);
+	path_put(&nd.path);
 exit:
 	putname(name);
 	return error;
@@ -2308,7 +2315,7 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	dput(dentry);
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	putname(to);
 out_putname:
@@ -2404,9 +2411,9 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 out_release:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
-	path_release(&old_nd);
+	path_put(&old_nd.path);
 exit:
 	putname(to);
 
@@ -2634,9 +2641,9 @@ exit4:
 exit3:
 	unlock_rename(new_dir, old_dir);
 exit2:
-	path_release(&newnd);
+	path_put(&newnd.path);
 exit1:
-	path_release(&oldnd);
+	path_put(&oldnd.path);
 exit:
 	return error;
 }
@@ -2810,7 +2817,6 @@ EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(path_release);
 EXPORT_SYMBOL(permission);
 EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5d9fd4c6d1f5..c77eedd2ac66 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1007,7 +1007,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name,
 
 out:
 	up_write(&namespace_sem);
-	path_release(&old_nd);
+	path_put(&old_nd.path);
 	return err;
 }
 
@@ -1126,8 +1126,8 @@ out1:
 out:
 	up_write(&namespace_sem);
 	if (!err)
-		path_release(&parent_nd);
-	path_release(&old_nd);
+		path_put(&parent_nd.path);
+	path_put(&old_nd.path);
 	return err;
 }
 
@@ -1512,7 +1512,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 		retval = do_new_mount(&nd, type_page, flags, mnt_flags,
 				      dev_name, data_page);
 dput_out:
-	path_release(&nd);
+	path_put(&nd.path);
 	return retval;
 }
 
@@ -1768,7 +1768,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 
 	error = security_sb_pivotroot(&old_nd, &new_nd);
 	if (error) {
-		path_release(&old_nd);
+		path_put(&old_nd.path);
 		goto out1;
 	}
 
@@ -1831,15 +1831,15 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
 	error = 0;
-	path_release(&root_parent);
-	path_release(&parent_nd);
+	path_put(&root_parent.path);
+	path_put(&parent_nd.path);
 out2:
 	mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
 	up_write(&namespace_sem);
-	path_release(&user_nd);
-	path_release(&old_nd);
+	path_put(&user_nd.path);
+	path_put(&old_nd.path);
 out1:
-	path_release(&new_nd);
+	path_put(&new_nd.path);
 out0:
 	unlock_kernel();
 	return error;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 3b6d83dc98a7..607f6eb9cdb5 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -148,7 +148,7 @@ out:
 	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
-	path_release(nd);
+	path_put(&nd->path);
 	goto out;
 out_follow:
 	while (d_mountpoint(nd->path.dentry) &&
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 49ef0b4d4439..aed8145d9087 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -43,7 +43,7 @@ static struct file *do_open(char *name, int flags)
 	if (!error)
 		return dentry_open(nd.path.dentry, nd.path.mnt, flags);
 
-	path_release(&nd);
+	path_put(&nd.path);
 	return ERR_PTR(error);
 }
 
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 2ac0e30285c2..717413f07e9a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -177,7 +177,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 			cache_put(&ek->h, &svc_expkey_cache);
 		else
 			err = -ENOMEM;
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	cache_flush();
  out:
@@ -630,7 +630,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 	kfree(exp.ex_uuid);
  	kfree(exp.ex_path);
 	if (nd.path.dentry)
-		path_release(&nd);
+		path_put(&nd.path);
  out_no_path:
 	if (dom)
 		auth_domain_put(dom);
@@ -1098,7 +1098,7 @@ finish:
 		cache_put(&fsid_key->h, &svc_expkey_cache);
 	if (clp)
 		auth_domain_put(clp);
-	path_release(&nd);
+	path_put(&nd.path);
 out_unlock:
 	exp_writeunlock();
 out:
@@ -1150,7 +1150,7 @@ exp_unexport(struct nfsctl_export *nxp)
 
 	err = -EINVAL;
 	exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL);
-	path_release(&nd);
+	path_put(&nd.path);
 	if (IS_ERR(exp))
 		goto out_domain;
 
@@ -1209,7 +1209,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
 	fh_put(&fh);
 	exp_put(exp);
 out:
-	path_release(&nd);
+	path_put(&nd.path);
 	return err;
 }
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a7a8fdf86ea7..1ff90625860f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -415,5 +415,5 @@ nfsd4_shutdown_recdir(void)
 	if (!rec_dir_init)
 		return;
 	rec_dir_init = 0;
-	path_release(&rec_dir);
+	path_put(&rec_dir.path);
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index be2b9ecd230a..bcb97d8e8b8b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3265,7 +3265,7 @@ nfs4_reset_recoverydir(char *recdir)
 		nfs4_set_recdir(recdir);
 		status = 0;
 	}
-	path_release(&nd);
+	path_put(&nd.path);
 	return status;
 }
 
diff --git a/fs/open.c b/fs/open.c
index 279aacf25600..ca8ac4bbd3bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -130,7 +130,7 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
 		error = vfs_statfs_native(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -149,7 +149,7 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64
 		error = vfs_statfs64(nd.path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -277,7 +277,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 put_write_and_out:
 	put_write_access(inode);
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -462,7 +462,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 		res = -EROFS;
 
 out_path_release:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	current->fsuid = old_fsuid;
 	current->fsgid = old_fsgid;
@@ -493,7 +493,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	set_fs_pwd(current->fs, nd.path.mnt, nd.path.dentry);
 
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -549,7 +549,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	set_fs_altroot();
 	error = 0;
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -621,7 +621,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 	mutex_unlock(&inode->i_mutex);
 
 dput_and_out:
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -676,7 +676,7 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 	if (error)
 		goto out;
 	error = chown_common(nd.path.dentry, user, group);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -696,7 +696,7 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 	if (error)
 		goto out;
 	error = chown_common(nd.path.dentry, user, group);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -710,7 +710,7 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
 	if (error)
 		goto out;
 	error = chown_common(nd.path.dentry, user, group);
-	path_release(&nd);
+	path_put(&nd.path);
 out:
 	return error;
 }
@@ -894,7 +894,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
 				     NULL);
 	else
-		path_release(nd);
+		path_put(&nd->path);
 	return filp;
 }
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0ef52230f8c7..c742be48348f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1164,7 +1164,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	int error = -EACCES;
 
 	/* We don't need a base pointer in the /proc filesystem */
-	path_release(nd);
+	path_put(&nd->path);
 
 	/* Are we allowed to snoop on the tasks file descriptors? */
 	if (!proc_fd_access_allowed(inode))
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2d1d6ac0c3f7..6841452e0dea 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2027,20 +2027,20 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_release(&nd);
+		path_put(&nd.path);
 		return -EXDEV;
 	}
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
 	if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) {
 		reiserfs_warning(sb,
 				 "reiserfs: Quota file must have tail packing disabled.");
-		path_release(&nd);
+		path_put(&nd.path);
 		return -EINVAL;
 	}
 	/* Not journalling quota? No more tests needed... */
 	if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
 	    !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
-		path_release(&nd);
+		path_put(&nd.path);
 		return vfs_quota_on(sb, type, format_id, path);
 	}
 	/* Quotafile not of fs root? */
@@ -2048,7 +2048,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		reiserfs_warning(sb,
 				 "reiserfs: Quota file not on filesystem root. "
 				 "Journalled quota will not work.");
-	path_release(&nd);
+	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path);
 }
 
diff --git a/fs/stat.c b/fs/stat.c
index 82680f2c01d2..9cf41f719d50 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -63,7 +63,7 @@ int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 	error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
 	if (!error) {
 		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -83,7 +83,7 @@ int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 	error = __user_walk_fd(dfd, name, 0, &nd);
 	if (!error) {
 		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
@@ -313,7 +313,7 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path,
 							      buf, bufsiz);
 			}
 		}
-		path_release(&nd);
+		path_put(&nd.path);
 	}
 	return error;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index 679b08288a66..b18da9c0b97f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -138,7 +138,7 @@ dput_and_out:
 	if (f)
 		fput(f);
 	else
-		path_release(&nd);
+		path_put(&nd.path);
 out:
 	return error;
 }
diff --git a/fs/xattr.c b/fs/xattr.c
index be0ee756c5f1..3acab1615460 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -263,7 +263,7 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
 	if (error)
 		return error;
 	error = setxattr(nd.path.dentry, name, value, size, flags);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -278,7 +278,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
 	if (error)
 		return error;
 	error = setxattr(nd.path.dentry, name, value, size, flags);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -348,7 +348,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
 	if (error)
 		return error;
 	error = getxattr(nd.path.dentry, name, value, size);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -363,7 +363,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
 	if (error)
 		return error;
 	error = getxattr(nd.path.dentry, name, value, size);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -422,7 +422,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
 	if (error)
 		return error;
 	error = listxattr(nd.path.dentry, list, size);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -436,7 +436,7 @@ sys_llistxattr(char __user *path, char __user *list, size_t size)
 	if (error)
 		return error;
 	error = listxattr(nd.path.dentry, list, size);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -483,7 +483,7 @@ sys_removexattr(char __user *path, char __user *name)
 	if (error)
 		return error;
 	error = removexattr(nd.path.dentry, name);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
@@ -497,7 +497,7 @@ sys_lremovexattr(char __user *path, char __user *name)
 	if (error)
 		return error;
 	error = removexattr(nd.path.dentry, name);
-	path_release(&nd);
+	path_put(&nd.path);
 	return error;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f052a108bcc1..a9952e490ac9 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -94,7 +94,7 @@ xfs_find_handle(
 		ASSERT(nd.path.dentry);
 		ASSERT(nd.path.dentry->d_inode);
 		inode = igrab(nd.path.dentry->d_inode);
-		path_release(&nd);
+		path_put(&nd.path);
 		break;
 	}
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 52fa2f78bb71..24d88e98a626 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -66,7 +66,6 @@ extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameida
 extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
-extern void path_release(struct nameidata *);
 
 extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
 extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags);
diff --git a/include/linux/path.h b/include/linux/path.h
index cbebdc5c9a60..4d976f959f33 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -9,4 +9,6 @@ struct path {
 	struct dentry *dentry;
 };
 
+extern void path_put(struct path *);
+
 #endif  /* _LINUX_PATH_H */
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index b898814fe4a0..9ef5e0aacc3c 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -550,7 +550,7 @@ void audit_trim_trees(void)
 			goto skip_it;
 
 		root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
-		path_release(&nd);
+		path_put(&nd.path);
 		if (!root_mnt)
 			goto skip_it;
 
@@ -642,7 +642,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
 	if (err)
 		goto Err;
 	mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
-	path_release(&nd);
+	path_put(&nd.path);
 	if (!mnt) {
 		err = -ENOMEM;
 		goto Err;
@@ -702,7 +702,7 @@ int audit_tag_tree(char *old, char *new)
 	if (err)
 		return err;
 	tagged = collect_mounts(nd.path.mnt, nd.path.dentry);
-	path_release(&nd);
+	path_put(&nd.path);
 	if (!tagged)
 		return -ENOMEM;
 
@@ -713,7 +713,7 @@ int audit_tag_tree(char *old, char *new)
 	}
 	mnt = mntget(nd.path.mnt);
 	dentry = dget(nd.path.dentry);
-	path_release(&nd);
+	path_put(&nd.path);
 
 	if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
 		follow_up(&mnt, &dentry);
@@ -744,13 +744,13 @@ int audit_tag_tree(char *old, char *new)
 		spin_lock(&vfsmount_lock);
 		if (!is_under(mnt, dentry, &nd)) {
 			spin_unlock(&vfsmount_lock);
-			path_release(&nd);
+			path_put(&nd.path);
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
 			continue;
 		}
 		spin_unlock(&vfsmount_lock);
-		path_release(&nd);
+		path_put(&nd.path);
 
 		list_for_each_entry(p, &list, mnt_list) {
 			failed = tag_chunk(p->mnt_root->d_inode, tree);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a36e66797c3d..2f2914b7cc30 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1161,11 +1161,11 @@ static int audit_get_nd(char *path, struct nameidata **ndp,
 static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
 {
 	if (ndp) {
-		path_release(ndp);
+		path_put(&ndp->path);
 		kfree(ndp);
 	}
 	if (ndw) {
-		path_release(ndw);
+		path_put(&ndw->path);
 		kfree(ndw);
 	}
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6bc3babf6175..1b395a41a8b2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -495,7 +495,7 @@ rpc_lookup_parent(char *path, struct nameidata *nd)
 static void
 rpc_release_path(struct nameidata *nd)
 {
-	path_release(nd);
+	path_put(&nd->path);
 	rpc_put_mount();
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7c3323e8827b..b8788fd5e3c6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -727,7 +727,7 @@ static struct sock *unix_find_other(struct net *net,
 		if (u->sk_type == type)
 			touch_atime(nd.path.mnt, nd.path.dentry);
 
-		path_release(&nd);
+		path_put(&nd.path);
 
 		err=-EPROTOTYPE;
 		if (u->sk_type != type) {
@@ -748,7 +748,7 @@ static struct sock *unix_find_other(struct net *net,
 	return u;
 
 put_fail:
-	path_release(&nd);
+	path_put(&nd.path);
 fail:
 	*error=err;
 	return NULL;
@@ -862,7 +862,7 @@ out_mknod_dput:
 	dput(dentry);
 out_mknod_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_release(&nd);
+	path_put(&nd.path);
 out_mknod_parent:
 	if (err==-EEXIST)
 		err=-EADDRINUSE;
-- 
cgit 


From 5dd784d04924be5d8bc066aded0ec3274b20e612 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:38 -0800
Subject: Introduce path_get()

This introduces the symmetric function to path_put() for getting a reference
to the dentry and vfsmount of a struct path in the right order.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c           | 17 +++++++++++++++--
 include/linux/path.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 024993535b6f..a6575ca9f9d7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -362,6 +362,19 @@ int deny_write_access(struct file * file)
 	return 0;
 }
 
+/**
+ * path_get - get a reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get(struct path *path)
+{
+	mntget(path->mnt);
+	dget(path->dentry);
+}
+EXPORT_SYMBOL(path_get);
+
 /**
  * path_put - put a reference to a path
  * @path: path to put the reference to
@@ -1160,8 +1173,8 @@ static int do_path_lookup(int dfd, const char *name,
 		if (retval)
 			goto fput_fail;
 
-		nd->path.mnt = mntget(file->f_path.mnt);
-		nd->path.dentry = dget(dentry);
+		nd->path = file->f_path;
+		path_get(&file->f_path);
 
 		fput_light(file, fput_needed);
 	}
diff --git a/include/linux/path.h b/include/linux/path.h
index 4d976f959f33..915e0c382a51 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -9,6 +9,7 @@ struct path {
 	struct dentry *dentry;
 };
 
+extern void path_get(struct path *);
 extern void path_put(struct path *);
 
 #endif  /* _LINUX_PATH_H */
-- 
cgit 


From 6ac08c39a16f72c2d3e845cb6849a1392fa03e80 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:38 -0800
Subject: Use struct path in fs_struct

* Use struct path in fs_struct.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c               | 34 +++++++++++++---------------
 fs/namei.c                | 53 +++++++++++++++++++------------------------
 fs/namespace.c            | 57 +++++++++++++++++++++--------------------------
 fs/proc/base.c            |  8 +++----
 include/linux/fs_struct.h |  6 ++---
 init/do_mounts.c          |  6 ++---
 kernel/auditsc.c          |  4 ++--
 kernel/exit.c             | 12 ++++------
 kernel/fork.c             | 18 +++++++--------
 9 files changed, 87 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 44f6cf23b70e..66aaf52199e9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1849,8 +1849,7 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
 				char *buf, int buflen)
 {
 	char *res;
-	struct vfsmount *rootmnt;
-	struct dentry *root;
+	struct path root;
 
 	/*
 	 * We have various synthetic filesystems that never get mounted.  On
@@ -1863,14 +1862,13 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
 		return dentry->d_op->d_dname(dentry, buf, buflen);
 
 	read_lock(&current->fs->lock);
-	rootmnt = mntget(current->fs->rootmnt);
-	root = dget(current->fs->root);
+	root = current->fs->root;
+	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 	spin_lock(&dcache_lock);
-	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
+	res = __d_path(dentry, vfsmnt, root.dentry, root.mnt, buf, buflen);
 	spin_unlock(&dcache_lock);
-	dput(root);
-	mntput(rootmnt);
+	path_put(&root);
 	return res;
 }
 
@@ -1916,28 +1914,28 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
 {
 	int error;
-	struct vfsmount *pwdmnt, *rootmnt;
-	struct dentry *pwd, *root;
+	struct path pwd, root;
 	char *page = (char *) __get_free_page(GFP_USER);
 
 	if (!page)
 		return -ENOMEM;
 
 	read_lock(&current->fs->lock);
-	pwdmnt = mntget(current->fs->pwdmnt);
-	pwd = dget(current->fs->pwd);
-	rootmnt = mntget(current->fs->rootmnt);
-	root = dget(current->fs->root);
+	pwd = current->fs->pwd;
+	path_get(&current->fs->pwd);
+	root = current->fs->root;
+	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
 	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
+	if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
 		unsigned long len;
 		char * cwd;
 
-		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
+		cwd = __d_path(pwd.dentry, pwd.mnt, root.dentry, root.mnt,
+			       page, PAGE_SIZE);
 		spin_unlock(&dcache_lock);
 
 		error = PTR_ERR(cwd);
@@ -1955,10 +1953,8 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
 		spin_unlock(&dcache_lock);
 
 out:
-	dput(pwd);
-	mntput(pwdmnt);
-	dput(root);
-	mntput(rootmnt);
+	path_put(&pwd);
+	path_put(&root);
 	free_page((unsigned long) page);
 	return error;
 }
diff --git a/fs/namei.c b/fs/namei.c
index a6575ca9f9d7..941c8e8228c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -549,16 +549,16 @@ walk_init_root(const char *name, struct nameidata *nd)
 	struct fs_struct *fs = current->fs;
 
 	read_lock(&fs->lock);
-	if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-		nd->path.mnt = mntget(fs->altrootmnt);
-		nd->path.dentry = dget(fs->altroot);
+	if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
+		nd->path = fs->altroot;
+		path_get(&fs->altroot);
 		read_unlock(&fs->lock);
 		if (__emul_lookup_dentry(name,nd))
 			return 0;
 		read_lock(&fs->lock);
 	}
-	nd->path.mnt = mntget(fs->rootmnt);
-	nd->path.dentry = dget(fs->root);
+	nd->path = fs->root;
+	path_get(&fs->root);
 	read_unlock(&fs->lock);
 	return 1;
 }
@@ -755,8 +755,8 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 		struct dentry *old = nd->path.dentry;
 
                 read_lock(&fs->lock);
-		if (nd->path.dentry == fs->root &&
-		    nd->path.mnt == fs->rootmnt) {
+		if (nd->path.dentry == fs->root.dentry &&
+		    nd->path.mnt == fs->root.mnt) {
                         read_unlock(&fs->lock);
 			break;
 		}
@@ -1078,8 +1078,8 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 		 */
 		nd->last_type = LAST_ROOT;
 		read_lock(&fs->lock);
-		nd->path.mnt = mntget(fs->rootmnt);
-		nd->path.dentry = dget(fs->root);
+		nd->path = fs->root;
+		path_get(&fs->root);
 		read_unlock(&fs->lock);
 		if (path_walk(name, nd) == 0) {
 			if (nd->path.dentry->d_inode) {
@@ -1099,29 +1099,22 @@ void set_fs_altroot(void)
 {
 	char *emul = __emul_prefix();
 	struct nameidata nd;
-	struct vfsmount *mnt = NULL, *oldmnt;
-	struct dentry *dentry = NULL, *olddentry;
+	struct path path = {}, old_path;
 	int err;
 	struct fs_struct *fs = current->fs;
 
 	if (!emul)
 		goto set_it;
 	err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
-	if (!err) {
-		mnt = nd.path.mnt;
-		dentry = nd.path.dentry;
-	}
+	if (!err)
+		path = nd.path;
 set_it:
 	write_lock(&fs->lock);
-	oldmnt = fs->altrootmnt;
-	olddentry = fs->altroot;
-	fs->altrootmnt = mnt;
-	fs->altroot = dentry;
+	old_path = fs->altroot;
+	fs->altroot = path;
 	write_unlock(&fs->lock);
-	if (olddentry) {
-		dput(olddentry);
-		mntput(oldmnt);
-	}
+	if (old_path.dentry)
+		path_put(&old_path);
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -1139,21 +1132,21 @@ static int do_path_lookup(int dfd, const char *name,
 
 	if (*name=='/') {
 		read_lock(&fs->lock);
-		if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-			nd->path.mnt = mntget(fs->altrootmnt);
-			nd->path.dentry = dget(fs->altroot);
+		if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
+			nd->path = fs->altroot;
+			path_get(&fs->altroot);
 			read_unlock(&fs->lock);
 			if (__emul_lookup_dentry(name,nd))
 				goto out; /* found in altroot */
 			read_lock(&fs->lock);
 		}
-		nd->path.mnt = mntget(fs->rootmnt);
-		nd->path.dentry = dget(fs->root);
+		nd->path = fs->root;
+		path_get(&fs->root);
 		read_unlock(&fs->lock);
 	} else if (dfd == AT_FDCWD) {
 		read_lock(&fs->lock);
-		nd->path.mnt = mntget(fs->pwdmnt);
-		nd->path.dentry = dget(fs->pwd);
+		nd->path = fs->pwd;
+		path_get(&fs->pwd);
 		read_unlock(&fs->lock);
 	} else {
 		struct dentry *dentry;
diff --git a/fs/namespace.c b/fs/namespace.c
index c77eedd2ac66..ac19212c9bc3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -593,7 +593,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
 	 *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
 	 */
 	if (flags & MNT_EXPIRE) {
-		if (mnt == current->fs->rootmnt ||
+		if (mnt == current->fs->root.mnt ||
 		    flags & (MNT_FORCE | MNT_DETACH))
 			return -EINVAL;
 
@@ -628,7 +628,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
 	 * /reboot - static binary that would close all descriptors and
 	 * call reboot(9). Then init(8) could umount root and exec /reboot.
 	 */
-	if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) {
+	if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
 		/*
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
@@ -1559,17 +1559,17 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	while (p) {
 		q->mnt_ns = new_ns;
 		if (fs) {
-			if (p == fs->rootmnt) {
+			if (p == fs->root.mnt) {
 				rootmnt = p;
-				fs->rootmnt = mntget(q);
+				fs->root.mnt = mntget(q);
 			}
-			if (p == fs->pwdmnt) {
+			if (p == fs->pwd.mnt) {
 				pwdmnt = p;
-				fs->pwdmnt = mntget(q);
+				fs->pwd.mnt = mntget(q);
 			}
-			if (p == fs->altrootmnt) {
+			if (p == fs->altroot.mnt) {
 				altrootmnt = p;
-				fs->altrootmnt = mntget(q);
+				fs->altroot.mnt = mntget(q);
 			}
 		}
 		p = next_mnt(p, mnt_ns->root);
@@ -1653,18 +1653,15 @@ out1:
 void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
 		 struct dentry *dentry)
 {
-	struct dentry *old_root;
-	struct vfsmount *old_rootmnt;
+	struct path old_root;
+
 	write_lock(&fs->lock);
 	old_root = fs->root;
-	old_rootmnt = fs->rootmnt;
-	fs->rootmnt = mntget(mnt);
-	fs->root = dget(dentry);
+	fs->root.mnt = mntget(mnt);
+	fs->root.dentry = dget(dentry);
 	write_unlock(&fs->lock);
-	if (old_root) {
-		dput(old_root);
-		mntput(old_rootmnt);
-	}
+	if (old_root.dentry)
+		path_put(&old_root);
 }
 
 /*
@@ -1674,20 +1671,16 @@ void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
 void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 		struct dentry *dentry)
 {
-	struct dentry *old_pwd;
-	struct vfsmount *old_pwdmnt;
+	struct path old_pwd;
 
 	write_lock(&fs->lock);
 	old_pwd = fs->pwd;
-	old_pwdmnt = fs->pwdmnt;
-	fs->pwdmnt = mntget(mnt);
-	fs->pwd = dget(dentry);
+	fs->pwd.mnt = mntget(mnt);
+	fs->pwd.dentry = dget(dentry);
 	write_unlock(&fs->lock);
 
-	if (old_pwd) {
-		dput(old_pwd);
-		mntput(old_pwdmnt);
-	}
+	if (old_pwd.dentry)
+		path_put(&old_pwd);
 }
 
 static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
@@ -1702,12 +1695,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
 		if (fs) {
 			atomic_inc(&fs->count);
 			task_unlock(p);
-			if (fs->root == old_nd->path.dentry
-			    && fs->rootmnt == old_nd->path.mnt)
+			if (fs->root.dentry == old_nd->path.dentry
+			    && fs->root.mnt == old_nd->path.mnt)
 				set_fs_root(fs, new_nd->path.mnt,
 					    new_nd->path.dentry);
-			if (fs->pwd == old_nd->path.dentry
-			    && fs->pwdmnt == old_nd->path.mnt)
+			if (fs->pwd.dentry == old_nd->path.dentry
+			    && fs->pwd.mnt == old_nd->path.mnt)
 				set_fs_pwd(fs, new_nd->path.mnt,
 					   new_nd->path.dentry);
 			put_fs_struct(fs);
@@ -1773,8 +1766,8 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	}
 
 	read_lock(&current->fs->lock);
-	user_nd.path.mnt = mntget(current->fs->rootmnt);
-	user_nd.path.dentry = dget(current->fs->root);
+	user_nd.path = current->fs->root;
+	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
 	mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c742be48348f..080f1f6eda61 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -165,8 +165,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
 	}
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		*mnt = mntget(fs->pwd.mnt);
+		*dentry = dget(fs->pwd.dentry);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
@@ -186,8 +186,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	}
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->rootmnt);
-		*dentry = dget(fs->root);
+		*mnt = mntget(fs->root.mnt);
+		*dentry = dget(fs->root.dentry);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 11a36ceddf73..e3e7254412da 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -1,15 +1,13 @@
 #ifndef _LINUX_FS_STRUCT_H
 #define _LINUX_FS_STRUCT_H
 
-struct dentry;
-struct vfsmount;
+#include <linux/path.h>
 
 struct fs_struct {
 	atomic_t count;
 	rwlock_t lock;
 	int umask;
-	struct dentry * root, * pwd, * altroot;
-	struct vfsmount * rootmnt, * pwdmnt, * altrootmnt;
+	struct path root, pwd, altroot;
 };
 
 #define INIT_FS {				\
diff --git a/init/do_mounts.c b/init/do_mounts.c
index f86573126f83..3885e70e7759 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -193,10 +193,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
 		return err;
 
 	sys_chdir("/root");
-	ROOT_DEV = current->fs->pwdmnt->mnt_sb->s_dev;
+	ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
 	printk("VFS: Mounted root (%s filesystem)%s.\n",
-	       current->fs->pwdmnt->mnt_sb->s_type->name,
-	       current->fs->pwdmnt->mnt_sb->s_flags & MS_RDONLY ? 
+	       current->fs->pwd.mnt->mnt_sb->s_type->name,
+	       current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
 	       " readonly" : "");
 	return 0;
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1c06ecf38d7b..741291a1de0d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1697,8 +1697,8 @@ void __audit_getname(const char *name)
 	++context->name_count;
 	if (!context->pwd) {
 		read_lock(&current->fs->lock);
-		context->pwd = dget(current->fs->pwd);
-		context->pwdmnt = mntget(current->fs->pwdmnt);
+		context->pwd = dget(current->fs->pwd.dentry);
+		context->pwdmnt = mntget(current->fs->pwd.mnt);
 		read_unlock(&current->fs->lock);
 	}
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 3b893e78ce61..506a957b665a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -512,14 +512,10 @@ static void __put_fs_struct(struct fs_struct *fs)
 {
 	/* No need to hold fs->lock if we are killing it */
 	if (atomic_dec_and_test(&fs->count)) {
-		dput(fs->root);
-		mntput(fs->rootmnt);
-		dput(fs->pwd);
-		mntput(fs->pwdmnt);
-		if (fs->altroot) {
-			dput(fs->altroot);
-			mntput(fs->altrootmnt);
-		}
+		path_put(&fs->root);
+		path_put(&fs->pwd);
+		if (fs->altroot.dentry)
+			path_put(&fs->altroot);
 		kmem_cache_free(fs_cachep, fs);
 	}
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 4363a4eb84e3..dd249c37b3a3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -600,16 +600,16 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 		rwlock_init(&fs->lock);
 		fs->umask = old->umask;
 		read_lock(&old->lock);
-		fs->rootmnt = mntget(old->rootmnt);
-		fs->root = dget(old->root);
-		fs->pwdmnt = mntget(old->pwdmnt);
-		fs->pwd = dget(old->pwd);
-		if (old->altroot) {
-			fs->altrootmnt = mntget(old->altrootmnt);
-			fs->altroot = dget(old->altroot);
+		fs->root = old->root;
+		path_get(&old->root);
+		fs->pwd = old->pwd;
+		path_get(&old->pwd);
+		if (old->altroot.dentry) {
+			fs->altroot = old->altroot;
+			path_get(&old->altroot);
 		} else {
-			fs->altrootmnt = NULL;
-			fs->altroot = NULL;
+			fs->altroot.mnt = NULL;
+			fs->altroot.dentry = NULL;
 		}
 		read_unlock(&old->lock);
 	}
-- 
cgit 


From ac748a09fc873915254ed69fe83f1a95436ee30a Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:34:39 -0800
Subject: Make set_fs_{root,pwd} take a struct path

In nearly all cases the set_fs_{root,pwd}() calls work on a struct
path. Change the function to reflect this and use path_get() here.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c            | 28 ++++++++++++++--------------
 fs/open.c                 | 12 ++++--------
 include/linux/fs_struct.h |  4 ++--
 3 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index ac19212c9bc3..eef57635ee07 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1650,15 +1650,14 @@ out1:
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
  * It can block. Requires the big lock held.
  */
-void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
-		 struct dentry *dentry)
+void set_fs_root(struct fs_struct *fs, struct path *path)
 {
 	struct path old_root;
 
 	write_lock(&fs->lock);
 	old_root = fs->root;
-	fs->root.mnt = mntget(mnt);
-	fs->root.dentry = dget(dentry);
+	fs->root = *path;
+	path_get(path);
 	write_unlock(&fs->lock);
 	if (old_root.dentry)
 		path_put(&old_root);
@@ -1668,15 +1667,14 @@ void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
  * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
  * It can block. Requires the big lock held.
  */
-void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-		struct dentry *dentry)
+void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
 	struct path old_pwd;
 
 	write_lock(&fs->lock);
 	old_pwd = fs->pwd;
-	fs->pwd.mnt = mntget(mnt);
-	fs->pwd.dentry = dget(dentry);
+	fs->pwd = *path;
+	path_get(path);
 	write_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
@@ -1697,12 +1695,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
 			task_unlock(p);
 			if (fs->root.dentry == old_nd->path.dentry
 			    && fs->root.mnt == old_nd->path.mnt)
-				set_fs_root(fs, new_nd->path.mnt,
-					    new_nd->path.dentry);
+				set_fs_root(fs, &new_nd->path);
 			if (fs->pwd.dentry == old_nd->path.dentry
 			    && fs->pwd.mnt == old_nd->path.mnt)
-				set_fs_pwd(fs, new_nd->path.mnt,
-					   new_nd->path.dentry);
+				set_fs_pwd(fs, &new_nd->path);
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
@@ -1845,6 +1841,7 @@ static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
 	struct mnt_namespace *ns;
+	struct path root;
 
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
@@ -1863,8 +1860,11 @@ static void __init init_mount_tree(void)
 	init_task.nsproxy->mnt_ns = ns;
 	get_mnt_ns(ns);
 
-	set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
-	set_fs_root(current->fs, ns->root, ns->root->mnt_root);
+	root.mnt = ns->root;
+	root.dentry = ns->root->mnt_root;
+
+	set_fs_pwd(current->fs, &root);
+	set_fs_root(current->fs, &root);
 }
 
 void __init mnt_init(void)
diff --git a/fs/open.c b/fs/open.c
index ca8ac4bbd3bd..54198538b67e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -490,7 +490,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	if (error)
 		goto dput_and_out;
 
-	set_fs_pwd(current->fs, nd.path.mnt, nd.path.dentry);
+	set_fs_pwd(current->fs, &nd.path);
 
 dput_and_out:
 	path_put(&nd.path);
@@ -501,9 +501,7 @@ out:
 asmlinkage long sys_fchdir(unsigned int fd)
 {
 	struct file *file;
-	struct dentry *dentry;
 	struct inode *inode;
-	struct vfsmount *mnt;
 	int error;
 
 	error = -EBADF;
@@ -511,9 +509,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
 	if (!file)
 		goto out;
 
-	dentry = file->f_path.dentry;
-	mnt = file->f_path.mnt;
-	inode = dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 
 	error = -ENOTDIR;
 	if (!S_ISDIR(inode->i_mode))
@@ -521,7 +517,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
 
 	error = file_permission(file, MAY_EXEC);
 	if (!error)
-		set_fs_pwd(current->fs, mnt, dentry);
+		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
 	fput(file);
 out:
@@ -545,7 +541,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
 
-	set_fs_root(current->fs, nd.path.mnt, nd.path.dentry);
+	set_fs_root(current->fs, &nd.path);
 	set_fs_altroot();
 	error = 0;
 dput_and_out:
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index e3e7254412da..282f54219129 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -20,8 +20,8 @@ extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
 extern void set_fs_altroot(void);
-extern void set_fs_root(struct fs_struct *, struct vfsmount *, struct dentry *);
-extern void set_fs_pwd(struct fs_struct *, struct vfsmount *, struct dentry *);
+extern void set_fs_root(struct fs_struct *, struct path *);
+extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void put_fs_struct(struct fs_struct *);
 
-- 
cgit 


From 44707fdf5938ad269ea5d6c5744d82f6a7328746 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:33 -0800
Subject: d_path: Use struct path in struct avc_audit_data

audit_log_d_path() is a d_path() wrapper that is used by the audit code.  To
use a struct path in audit_log_d_path() I need to embed it into struct
avc_audit_data.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h          |  5 ++---
 kernel/audit.c                 | 12 ++++++------
 kernel/auditsc.c               | 28 +++++++++++-----------------
 security/selinux/avc.c         | 15 ++++++++++-----
 security/selinux/hooks.c       | 28 ++++++++++++----------------
 security/selinux/include/avc.h |  6 ++----
 6 files changed, 43 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 97153027207a..2af9ec025015 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -534,8 +534,7 @@ extern void		    audit_log_n_untrustedstring(struct audit_buffer *ab,
 							const char *string);
 extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const char *prefix,
-					     struct dentry *dentry,
-					     struct vfsmount *vfsmnt);
+					     struct path *path);
 extern void		    audit_log_lost(const char *message);
 				/* Private API (for audit.c only) */
 extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
@@ -552,7 +551,7 @@ extern int audit_enabled;
 #define audit_log_hex(a,b,l) do { ; } while (0)
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
-#define audit_log_d_path(b,p,d,v) do { ; } while (0)
+#define audit_log_d_path(b, p, d) do { ; } while (0)
 #define audit_enabled 0
 #endif
 #endif
diff --git a/kernel/audit.c b/kernel/audit.c
index c8555b180213..783e65701247 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1312,26 +1312,26 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
 
 /* This is a helper-function to print the escaped d_path */
 void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
-		      struct dentry *dentry, struct vfsmount *vfsmnt)
+		      struct path *path)
 {
-	char *p, *path;
+	char *p, *pathname;
 
 	if (prefix)
 		audit_log_format(ab, " %s", prefix);
 
 	/* We will allow 11 spaces for ' (deleted)' to be appended */
-	path = kmalloc(PATH_MAX+11, ab->gfp_mask);
-	if (!path) {
+	pathname = kmalloc(PATH_MAX+11, ab->gfp_mask);
+	if (!pathname) {
 		audit_log_format(ab, "<no memory>");
 		return;
 	}
-	p = d_path(dentry, vfsmnt, path, PATH_MAX+11);
+	p = d_path(path->dentry, path->mnt, pathname, PATH_MAX+11);
 	if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */
 		/* FIXME: can we save some information here? */
 		audit_log_format(ab, "<too long>");
 	} else
 		audit_log_untrustedstring(ab, p);
-	kfree(path);
+	kfree(pathname);
 }
 
 /**
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 741291a1de0d..ac6d9b23b018 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -208,8 +208,7 @@ struct audit_context {
 	int		    name_count;
 	struct audit_names  names[AUDIT_NAMES];
 	char *		    filterkey;	/* key for rule that triggered record */
-	struct dentry *	    pwd;
-	struct vfsmount *   pwdmnt;
+	struct path	    pwd;
 	struct audit_context *previous; /* For nested syscalls */
 	struct audit_aux_data *aux;
 	struct audit_aux_data *aux_pids;
@@ -786,12 +785,9 @@ static inline void audit_free_names(struct audit_context *context)
 			__putname(context->names[i].name);
 	}
 	context->name_count = 0;
-	if (context->pwd)
-		dput(context->pwd);
-	if (context->pwdmnt)
-		mntput(context->pwdmnt);
-	context->pwd = NULL;
-	context->pwdmnt = NULL;
+	path_put(&context->pwd);
+	context->pwd.dentry = NULL;
+	context->pwd.mnt = NULL;
 }
 
 static inline void audit_free_aux(struct audit_context *context)
@@ -930,8 +926,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
 			if ((vma->vm_flags & VM_EXECUTABLE) &&
 			    vma->vm_file) {
 				audit_log_d_path(ab, "exe=",
-						 vma->vm_file->f_path.dentry,
-						 vma->vm_file->f_path.mnt);
+						 &vma->vm_file->f_path);
 				break;
 			}
 			vma = vma->vm_next;
@@ -1341,10 +1336,10 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				  context->target_sid, context->target_comm))
 			call_panic = 1;
 
-	if (context->pwd && context->pwdmnt) {
+	if (context->pwd.dentry && context->pwd.mnt) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
 		if (ab) {
-			audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
+			audit_log_d_path(ab, "cwd=", &context->pwd);
 			audit_log_end(ab);
 		}
 	}
@@ -1367,8 +1362,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			case 0:
 				/* name was specified as a relative path and the
 				 * directory component is the cwd */
-				audit_log_d_path(ab, " name=", context->pwd,
-						 context->pwdmnt);
+				audit_log_d_path(ab, " name=", &context->pwd);
 				break;
 			default:
 				/* log the name's directory component */
@@ -1695,10 +1689,10 @@ void __audit_getname(const char *name)
 	context->names[context->name_count].ino  = (unsigned long)-1;
 	context->names[context->name_count].osid = 0;
 	++context->name_count;
-	if (!context->pwd) {
+	if (!context->pwd.dentry) {
 		read_lock(&current->fs->lock);
-		context->pwd = dget(current->fs->pwd.dentry);
-		context->pwdmnt = mntget(current->fs->pwd.mnt);
+		context->pwd = current->fs->pwd;
+		path_get(&current->fs->pwd);
 		read_unlock(&current->fs->lock);
 	}
 
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index e8529e2f51e5..187964e88af1 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -568,10 +568,11 @@ void avc_audit(u32 ssid, u32 tsid,
 			audit_log_format(ab, " capability=%d", a->u.cap);
 			break;
 		case AVC_AUDIT_DATA_FS:
-			if (a->u.fs.dentry) {
-				struct dentry *dentry = a->u.fs.dentry;
-				if (a->u.fs.mnt) {
-					audit_log_d_path(ab, "path=", dentry, a->u.fs.mnt);
+			if (a->u.fs.path.dentry) {
+				struct dentry *dentry = a->u.fs.path.dentry;
+				if (a->u.fs.path.mnt) {
+					audit_log_d_path(ab, "path=",
+							 &a->u.fs.path);
 				} else {
 					audit_log_format(ab, " name=");
 					audit_log_untrustedstring(ab, dentry->d_name.name);
@@ -626,8 +627,12 @@ void avc_audit(u32 ssid, u32 tsid,
 				case AF_UNIX:
 					u = unix_sk(sk);
 					if (u->dentry) {
+						struct path path = {
+							.dentry = u->dentry,
+							.mnt = u->mnt
+						};
 						audit_log_d_path(ab, "path=",
-								 u->dentry, u->mnt);
+								 &path);
 						break;
 					}
 					if (!u->addr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ffeefa3c2c77..75c2e99bfb81 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1356,8 +1356,8 @@ static inline int dentry_has_perm(struct task_struct *tsk,
 	struct inode *inode = dentry->d_inode;
 	struct avc_audit_data ad;
 	AVC_AUDIT_DATA_INIT(&ad,FS);
-	ad.u.fs.mnt = mnt;
-	ad.u.fs.dentry = dentry;
+	ad.u.fs.path.mnt = mnt;
+	ad.u.fs.path.dentry = dentry;
 	return inode_has_perm(tsk, inode, av, &ad);
 }
 
@@ -1375,15 +1375,12 @@ static int file_has_perm(struct task_struct *tsk,
 {
 	struct task_security_struct *tsec = tsk->security;
 	struct file_security_struct *fsec = file->f_security;
-	struct vfsmount *mnt = file->f_path.mnt;
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct avc_audit_data ad;
 	int rc;
 
 	AVC_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.mnt = mnt;
-	ad.u.fs.dentry = dentry;
+	ad.u.fs.path = file->f_path;
 
 	if (tsec->sid != fsec->sid) {
 		rc = avc_has_perm(tsec->sid, fsec->sid,
@@ -1418,7 +1415,7 @@ static int may_create(struct inode *dir,
 	sbsec = dir->i_sb->s_security;
 
 	AVC_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.dentry = dentry;
+	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(tsec->sid, dsec->sid, SECCLASS_DIR,
 			  DIR__ADD_NAME | DIR__SEARCH,
@@ -1476,7 +1473,7 @@ static int may_link(struct inode *dir,
 	isec = dentry->d_inode->i_security;
 
 	AVC_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.dentry = dentry;
+	ad.u.fs.path.dentry = dentry;
 
 	av = DIR__SEARCH;
 	av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME);
@@ -1523,7 +1520,7 @@ static inline int may_rename(struct inode *old_dir,
 
 	AVC_AUDIT_DATA_INIT(&ad, FS);
 
-	ad.u.fs.dentry = old_dentry;
+	ad.u.fs.path.dentry = old_dentry;
 	rc = avc_has_perm(tsec->sid, old_dsec->sid, SECCLASS_DIR,
 			  DIR__REMOVE_NAME | DIR__SEARCH, &ad);
 	if (rc)
@@ -1539,7 +1536,7 @@ static inline int may_rename(struct inode *old_dir,
 			return rc;
 	}
 
-	ad.u.fs.dentry = new_dentry;
+	ad.u.fs.path.dentry = new_dentry;
 	av = DIR__ADD_NAME | DIR__SEARCH;
 	if (new_dentry->d_inode)
 		av |= DIR__REMOVE_NAME;
@@ -1918,8 +1915,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm)
 	}
 
 	AVC_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.mnt = bprm->file->f_path.mnt;
-	ad.u.fs.dentry = bprm->file->f_path.dentry;
+	ad.u.fs.path = bprm->file->f_path;
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
 		newsid = tsec->sid;
@@ -2315,7 +2311,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, void *data)
 		return rc;
 
 	AVC_AUDIT_DATA_INIT(&ad,FS);
-	ad.u.fs.dentry = sb->s_root;
+	ad.u.fs.path.dentry = sb->s_root;
 	return superblock_has_perm(current, sb, FILESYSTEM__MOUNT, &ad);
 }
 
@@ -2324,7 +2320,7 @@ static int selinux_sb_statfs(struct dentry *dentry)
 	struct avc_audit_data ad;
 
 	AVC_AUDIT_DATA_INIT(&ad,FS);
-	ad.u.fs.dentry = dentry->d_sb->s_root;
+	ad.u.fs.path.dentry = dentry->d_sb->s_root;
 	return superblock_has_perm(current, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
 
@@ -2587,7 +2583,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value
 		return -EPERM;
 
 	AVC_AUDIT_DATA_INIT(&ad,FS);
-	ad.u.fs.dentry = dentry;
+	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(tsec->sid, isec->sid, isec->sclass,
 			  FILE__RELABELFROM, &ad);
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 80c28fa6621c..8e23d7a873a4 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/in6.h>
+#include <linux/path.h>
 #include <asm/system.h>
 #include "flask.h"
 #include "av_permissions.h"
@@ -30,8 +31,6 @@ extern int selinux_enforcing;
 struct avc_entry;
 
 struct task_struct;
-struct vfsmount;
-struct dentry;
 struct inode;
 struct sock;
 struct sk_buff;
@@ -46,8 +45,7 @@ struct avc_audit_data {
 	struct task_struct *tsk;
 	union 	{
 		struct {
-			struct vfsmount *mnt;
-			struct dentry *dentry;
+			struct path path;
 			struct inode *inode;
 		} fs;
 		struct {
-- 
cgit 


From 3dcd25f37cfe2943beca93f41f50994108248a60 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:35 -0800
Subject: d_path: Make proc_get_link() use a struct path argument

proc_get_link() is always called with a dentry and a vfsmount from a struct
path.  Make proc_get_link() take it directly as an argument.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c          | 60 +++++++++++++++++++++----------------------------
 fs/proc/internal.h      |  2 +-
 fs/proc/task_mmu.c      |  6 ++---
 fs/proc/task_nommu.c    |  6 ++---
 include/linux/proc_fs.h |  2 +-
 5 files changed, 34 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 080f1f6eda61..47338d92db51 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -153,7 +153,7 @@ static int get_nr_threads(struct task_struct *tsk)
 	return count;
 }
 
-static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+static int proc_cwd_link(struct inode *inode, struct path *path)
 {
 	struct task_struct *task = get_proc_task(inode);
 	struct fs_struct *fs = NULL;
@@ -165,8 +165,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
 	}
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwd.mnt);
-		*dentry = dget(fs->pwd.dentry);
+		*path = fs->pwd;
+		path_get(&fs->pwd);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
@@ -174,7 +174,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
 	return result;
 }
 
-static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+static int proc_root_link(struct inode *inode, struct path *path)
 {
 	struct task_struct *task = get_proc_task(inode);
 	struct fs_struct *fs = NULL;
@@ -186,8 +186,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	}
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->root.mnt);
-		*dentry = dget(fs->root.dentry);
+		*path = fs->root;
+		path_get(&fs->root);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
@@ -1170,34 +1170,30 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path.dentry,
-						&nd->path.mnt);
+	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
 	nd->last_type = LAST_BIND;
 out:
 	return ERR_PTR(error);
 }
 
-static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
-			    char __user *buffer, int buflen)
+static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
 {
-	struct inode * inode;
 	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
-	char *path;
+	char *pathname;
 	int len;
 
 	if (!tmp)
 		return -ENOMEM;
 
-	inode = dentry->d_inode;
-	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
-	len = PTR_ERR(path);
-	if (IS_ERR(path))
+	pathname = d_path(path->dentry, path->mnt, tmp, PAGE_SIZE);
+	len = PTR_ERR(pathname);
+	if (IS_ERR(pathname))
 		goto out;
-	len = tmp + PAGE_SIZE - 1 - path;
+	len = tmp + PAGE_SIZE - 1 - pathname;
 
 	if (len > buflen)
 		len = buflen;
-	if (copy_to_user(buffer, path, len))
+	if (copy_to_user(buffer, pathname, len))
 		len = -EFAULT;
  out:
 	free_page((unsigned long)tmp);
@@ -1208,20 +1204,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
 {
 	int error = -EACCES;
 	struct inode *inode = dentry->d_inode;
-	struct dentry *de;
-	struct vfsmount *mnt = NULL;
+	struct path path;
 
 	/* Are we allowed to snoop on the tasks file descriptors? */
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
+	error = PROC_I(inode)->op.proc_get_link(inode, &path);
 	if (error)
 		goto out;
 
-	error = do_proc_readlink(de, mnt, buffer, buflen);
-	dput(de);
-	mntput(mnt);
+	error = do_proc_readlink(&path, buffer, buflen);
+	path_put(&path);
 out:
 	return error;
 }
@@ -1448,8 +1442,7 @@ out:
 
 #define PROC_FDINFO_MAX 64
 
-static int proc_fd_info(struct inode *inode, struct dentry **dentry,
-			struct vfsmount **mnt, char *info)
+static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 {
 	struct task_struct *task = get_proc_task(inode);
 	struct files_struct *files = NULL;
@@ -1468,10 +1461,10 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry,
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			if (mnt)
-				*mnt = mntget(file->f_path.mnt);
-			if (dentry)
-				*dentry = dget(file->f_path.dentry);
+			if (path) {
+				*path = file->f_path;
+				path_get(&file->f_path);
+			}
 			if (info)
 				snprintf(info, PROC_FDINFO_MAX,
 					 "pos:\t%lli\n"
@@ -1488,10 +1481,9 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry,
 	return -ENOENT;
 }
 
-static int proc_fd_link(struct inode *inode, struct dentry **dentry,
-			struct vfsmount **mnt)
+static int proc_fd_link(struct inode *inode, struct path *path)
 {
-	return proc_fd_info(inode, dentry, mnt, NULL);
+	return proc_fd_info(inode, path, NULL);
 }
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1685,7 +1677,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
 				      size_t len, loff_t *ppos)
 {
 	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp);
+	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
 	if (!err)
 		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
 	return err;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index ea496ffeabe7..1c81c8f1aeed 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -48,7 +48,7 @@ extern int maps_protect;
 
 extern void create_seq_entry(char *name, mode_t mode,
 				const struct file_operations *f);
-extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
+extern int proc_exe_link(struct inode *, struct path *);
 extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *task);
 extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ae4d3f2c8cb2..4c4f99fb1bfc 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,7 +75,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
 	return mm->total_vm;
 }
 
-int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+int proc_exe_link(struct inode *inode, struct path *path)
 {
 	struct vm_area_struct * vma;
 	int result = -ENOENT;
@@ -98,8 +98,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_path.mnt);
-		*dentry = dget(vma->vm_file->f_path.dentry);
+		*path = vma->vm_file->f_path;
+		path_get(&vma->vm_file->f_path);
 		result = 0;
 	}
 
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index abfc6f5e56ca..8011528518bd 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -103,7 +103,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
 	return size;
 }
 
-int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+int proc_exe_link(struct inode *inode, struct path *path)
 {
 	struct vm_list_struct *vml;
 	struct vm_area_struct *vma;
@@ -126,8 +126,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_path.mnt);
-		*dentry = dget(vma->vm_file->f_path.dentry);
+		*path = vma->vm_file->f_path;
+		path_get(&vma->vm_file->f_path);
 		result = 0;
 	}
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d6a4f69bdc92..d9a9e718ad19 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -269,7 +269,7 @@ extern void kclist_add(struct kcore_list *, void *, size_t);
 #endif
 
 union proc_op {
-	int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
+	int (*proc_get_link)(struct inode *, struct path *);
 	int (*proc_read)(struct task_struct *task, char *page);
 	int (*proc_show)(struct seq_file *m,
 		struct pid_namespace *ns, struct pid *pid,
-- 
cgit 


From 448678a0f3cdd0157f00e98bd337e32030273637 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:36 -0800
Subject: d_path: Make get_dcookie() use a struct path argument

get_dcookie() is always called with a dentry and a vfsmount from a struct
path.  Make get_dcookie() take it directly as an argument.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/oprofile/cell/spu_task_sync.c | 15 ++++++-------
 drivers/oprofile/buffer_sync.c             | 21 ++++++++----------
 fs/dcookies.c                              | 34 +++++++++++++-----------------
 include/linux/dcookies.h                   | 15 ++++++-------
 4 files changed, 37 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 4a890cb42b98..257b13cb18af 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -198,14 +198,13 @@ out:
  * dcookie user still being registered (namely, the reader
  * of the event buffer).
  */
-static inline unsigned long fast_get_dcookie(struct dentry *dentry,
-					     struct vfsmount *vfsmnt)
+static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (dentry->d_cookie)
-		return (unsigned long)dentry;
-	get_dcookie(dentry, vfsmnt, &cookie);
+	if (path->dentry->d_cookie)
+		return (unsigned long)path->dentry;
+	get_dcookie(path, &cookie);
 	return cookie;
 }
 
@@ -240,8 +239,7 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 			continue;
 		if (!(vma->vm_flags & VM_EXECUTABLE))
 			continue;
-		app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
-					  vma->vm_file->f_vfsmnt);
+		app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
 		pr_debug("got dcookie for %s\n",
 			 vma->vm_file->f_dentry->d_name.name);
 		app = vma->vm_file;
@@ -262,8 +260,7 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 		break;
 	}
 
-	*spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
-						 vma->vm_file->f_vfsmnt);
+	*spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
 	pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
 
 	up_read(&mm->mmap_sem);
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 8134c7e198a5..b07ba2a14119 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -187,23 +187,22 @@ void sync_stop(void)
 	end_sync();
 }
 
- 
+
 /* Optimisation. We can manage without taking the dcookie sem
  * because we cannot reach this code without at least one
  * dcookie user still being registered (namely, the reader
  * of the event buffer). */
-static inline unsigned long fast_get_dcookie(struct dentry * dentry,
-	struct vfsmount * vfsmnt)
+static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
- 
-	if (dentry->d_cookie)
-		return (unsigned long)dentry;
-	get_dcookie(dentry, vfsmnt, &cookie);
+
+	if (path->dentry->d_cookie)
+		return (unsigned long)path->dentry;
+	get_dcookie(path, &cookie);
 	return cookie;
 }
 
- 
+
 /* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
  * which corresponds loosely to "application name". This is
  * not strictly necessary but allows oprofile to associate
@@ -222,8 +221,7 @@ static unsigned long get_exec_dcookie(struct mm_struct * mm)
 			continue;
 		if (!(vma->vm_flags & VM_EXECUTABLE))
 			continue;
-		cookie = fast_get_dcookie(vma->vm_file->f_path.dentry,
-			vma->vm_file->f_path.mnt);
+		cookie = fast_get_dcookie(&vma->vm_file->f_path);
 		break;
 	}
 
@@ -248,8 +246,7 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o
 			continue;
 
 		if (vma->vm_file) {
-			cookie = fast_get_dcookie(vma->vm_file->f_path.dentry,
-				vma->vm_file->f_path.mnt);
+			cookie = fast_get_dcookie(&vma->vm_file->f_path);
 			*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr -
 				vma->vm_start;
 		} else {
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 792cbf55fa95..13c29f1f711f 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -24,6 +24,7 @@
 #include <linux/errno.h>
 #include <linux/dcookies.h>
 #include <linux/mutex.h>
+#include <linux/path.h>
 #include <asm/uaccess.h>
 
 /* The dcookies are allocated from a kmem_cache and
@@ -31,8 +32,7 @@
  * code here is particularly performance critical
  */
 struct dcookie_struct {
-	struct dentry * dentry;
-	struct vfsmount * vfsmnt;
+	struct path path;
 	struct list_head hash_list;
 };
 
@@ -51,7 +51,7 @@ static inline int is_live(void)
 /* The dentry is locked, its address will do for the cookie */
 static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
 {
-	return (unsigned long)dcs->dentry;
+	return (unsigned long)dcs->path.dentry;
 }
 
 
@@ -89,19 +89,17 @@ static void hash_dcookie(struct dcookie_struct * dcs)
 }
 
 
-static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
-	struct vfsmount * vfsmnt)
+static struct dcookie_struct *alloc_dcookie(struct path *path)
 {
-	struct dcookie_struct * dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL);
+	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
+							GFP_KERNEL);
 	if (!dcs)
 		return NULL;
 
-	dentry->d_cookie = dcs;
-
-	dcs->dentry = dget(dentry);
-	dcs->vfsmnt = mntget(vfsmnt);
+	path->dentry->d_cookie = dcs;
+	dcs->path = *path;
+	path_get(path);
 	hash_dcookie(dcs);
-
 	return dcs;
 }
 
@@ -109,8 +107,7 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
 /* This is the main kernel-side routine that retrieves the cookie
  * value for a dentry/vfsmnt pair.
  */
-int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie)
+int get_dcookie(struct path *path, unsigned long *cookie)
 {
 	int err = 0;
 	struct dcookie_struct * dcs;
@@ -122,10 +119,10 @@ int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
 		goto out;
 	}
 
-	dcs = dentry->d_cookie;
+	dcs = path->dentry->d_cookie;
 
 	if (!dcs)
-		dcs = alloc_dcookie(dentry, vfsmnt);
+		dcs = alloc_dcookie(path);
 
 	if (!dcs) {
 		err = -ENOMEM;
@@ -174,7 +171,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 		goto out;
 
 	/* FIXME: (deleted) ? */
-	path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE);
+	path = d_path(dcs->path.dentry, dcs->path.mnt, kbuf, PAGE_SIZE);
 
 	if (IS_ERR(path)) {
 		err = PTR_ERR(path);
@@ -254,9 +251,8 @@ out_kmem:
 
 static void free_dcookie(struct dcookie_struct * dcs)
 {
-	dcs->dentry->d_cookie = NULL;
-	dput(dcs->dentry);
-	mntput(dcs->vfsmnt);
+	dcs->path.dentry->d_cookie = NULL;
+	path_put(&dcs->path);
 	kmem_cache_free(dcookie_cache, dcs);
 }
 
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index 98c69ab80c84..24c806f12a6c 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -13,6 +13,7 @@
 #ifdef CONFIG_PROFILING
  
 #include <linux/dcache.h>
+#include <linux/path.h>
 #include <linux/types.h>
  
 struct dcookie_user;
@@ -43,8 +44,7 @@ void dcookie_unregister(struct dcookie_user * user);
  *
  * Returns 0 on success, with *cookie filled in
  */
-int get_dcookie(struct dentry * dentry, struct vfsmount * vfsmnt,
-	unsigned long * cookie);
+int get_dcookie(struct path *path, unsigned long *cookie);
 
 #else
 
@@ -57,13 +57,12 @@ static inline void dcookie_unregister(struct dcookie_user * user)
 {
 	return;
 }
- 
-static inline int get_dcookie(struct dentry * dentry,
-	struct vfsmount * vfsmnt, unsigned long * cookie)
+
+static inline int get_dcookie(struct path *path, unsigned long *cookie)
 {
 	return -ENOSYS;
-} 
- 
+}
+
 #endif /* CONFIG_PROFILING */
- 
+
 #endif /* DCOOKIES_H */
-- 
cgit 


From 5477549161480432d053565d2720f08626baf9e3 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:39 -0800
Subject: Use struct path in struct svc_export

I'm embedding struct path into struct svc_export.

[akpm@linux-foundation.org: coding-style fixes]
[ezk@cs.sunysb.edu: NFSD: fix wrong mnt_writer count in rename]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/export.c            | 67 ++++++++++++++++++++++-----------------------
 fs/nfsd/nfs3proc.c          |  2 +-
 fs/nfsd/nfs3xdr.c           |  4 +--
 fs/nfsd/nfs4xdr.c           | 12 ++++----
 fs/nfsd/nfsfh.c             | 26 +++++++++---------
 fs/nfsd/nfsproc.c           |  6 ++--
 fs/nfsd/nfsxdr.c            |  2 +-
 fs/nfsd/vfs.c               | 13 +++++----
 include/linux/nfsd/export.h |  5 ++--
 9 files changed, 67 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 717413f07e9a..7d7896814fa4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -332,10 +332,9 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 static void svc_export_put(struct kref *ref)
 {
 	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
-	dput(exp->ex_dentry);
-	mntput(exp->ex_mnt);
+	path_put(&exp->ex_path);
 	auth_domain_put(exp->ex_client);
-	kfree(exp->ex_path);
+	kfree(exp->ex_pathname);
 	nfsd4_fslocs_free(&exp->ex_fslocs);
 	kfree(exp);
 }
@@ -349,7 +348,7 @@ static void svc_export_request(struct cache_detail *cd,
 	char *pth;
 
 	qword_add(bpp, blen, exp->ex_client->name);
-	pth = d_path(exp->ex_dentry, exp->ex_mnt, *bpp, *blen);
+	pth = d_path(exp->ex_path.dentry, exp->ex_path.mnt, *bpp, *blen);
 	if (IS_ERR(pth)) {
 		/* is this correct? */
 		(*bpp)[0] = '\n';
@@ -508,7 +507,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 	int an_int;
 
 	nd.path.dentry = NULL;
-	exp.ex_path = NULL;
+	exp.ex_pathname = NULL;
 
 	/* fs locations */
 	exp.ex_fslocs.locations = NULL;
@@ -547,11 +546,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 
 	exp.h.flags = 0;
 	exp.ex_client = dom;
-	exp.ex_mnt = nd.path.mnt;
-	exp.ex_dentry = nd.path.dentry;
-	exp.ex_path = kstrdup(buf, GFP_KERNEL);
+	exp.ex_path.mnt = nd.path.mnt;
+	exp.ex_path.dentry = nd.path.dentry;
+	exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
 	err = -ENOMEM;
-	if (!exp.ex_path)
+	if (!exp.ex_pathname)
 		goto out;
 
 	/* expiry */
@@ -628,7 +627,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
  out:
 	nfsd4_fslocs_free(&exp.ex_fslocs);
 	kfree(exp.ex_uuid);
- 	kfree(exp.ex_path);
+	kfree(exp.ex_pathname);
 	if (nd.path.dentry)
 		path_put(&nd.path);
  out_no_path:
@@ -653,7 +652,7 @@ static int svc_export_show(struct seq_file *m,
 		return 0;
 	}
 	exp = container_of(h, struct svc_export, h);
-	seq_path(m, exp->ex_mnt, exp->ex_dentry, " \t\n\\");
+	seq_path(m, exp->ex_path.mnt, exp->ex_path.dentry, " \t\n\\");
 	seq_putc(m, '\t');
 	seq_escape(m, exp->ex_client->name, " \t\n\\");
 	seq_putc(m, '(');
@@ -680,8 +679,8 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b)
 	struct svc_export *orig = container_of(a, struct svc_export, h);
 	struct svc_export *new = container_of(b, struct svc_export, h);
 	return orig->ex_client == new->ex_client &&
-		orig->ex_dentry == new->ex_dentry &&
-		orig->ex_mnt == new->ex_mnt;
+		orig->ex_path.dentry == new->ex_path.dentry &&
+		orig->ex_path.mnt == new->ex_path.mnt;
 }
 
 static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
@@ -691,9 +690,9 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 
 	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
-	new->ex_dentry = dget(item->ex_dentry);
-	new->ex_mnt = mntget(item->ex_mnt);
-	new->ex_path = NULL;
+	new->ex_path.dentry = dget(item->ex_path.dentry);
+	new->ex_path.mnt = mntget(item->ex_path.mnt);
+	new->ex_pathname = NULL;
 	new->ex_fslocs.locations = NULL;
 	new->ex_fslocs.locations_count = 0;
 	new->ex_fslocs.migrated = 0;
@@ -711,8 +710,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_fsid = item->ex_fsid;
 	new->ex_uuid = item->ex_uuid;
 	item->ex_uuid = NULL;
-	new->ex_path = item->ex_path;
-	item->ex_path = NULL;
+	new->ex_pathname = item->ex_pathname;
+	item->ex_pathname = NULL;
 	new->ex_fslocs.locations = item->ex_fslocs.locations;
 	item->ex_fslocs.locations = NULL;
 	new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
@@ -755,8 +754,8 @@ svc_export_lookup(struct svc_export *exp)
 	struct cache_head *ch;
 	int hash;
 	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
-	hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS);
-	hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
 
 	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
 				 hash);
@@ -772,8 +771,8 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
 	struct cache_head *ch;
 	int hash;
 	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
-	hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS);
-	hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
 
 	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
 				 &old->h,
@@ -815,8 +814,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
-	key.ek_mnt = exp->ex_mnt;
-	key.ek_dentry = exp->ex_dentry;
+	key.ek_mnt = exp->ex_path.mnt;
+	key.ek_dentry = exp->ex_path.dentry;
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
@@ -870,8 +869,8 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 		return ERR_PTR(-ENOENT);
 
 	key.ex_client = clp;
-	key.ex_mnt = mnt;
-	key.ex_dentry = dentry;
+	key.ex_path.mnt = mnt;
+	key.ex_path.dentry = dentry;
 
 	exp = svc_export_lookup(&key);
 	if (exp == NULL)
@@ -968,7 +967,7 @@ static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
 static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
 {
 	u32 fsid[2];
-	struct inode *inode = exp->ex_dentry->d_inode;
+	struct inode *inode = exp->ex_path.dentry->d_inode;
 	dev_t dev = inode->i_sb->s_dev;
 
 	if (old_valid_dev(dev)) {
@@ -982,7 +981,7 @@ static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
 static void exp_unhash(struct svc_export *exp)
 {
 	struct svc_expkey *ek;
-	struct inode *inode = exp->ex_dentry->d_inode;
+	struct inode *inode = exp->ex_path.dentry->d_inode;
 
 	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
 	if (!IS_ERR(ek)) {
@@ -1064,12 +1063,11 @@ exp_export(struct nfsctl_export *nxp)
 
 	new.h.expiry_time = NEVER;
 	new.h.flags = 0;
-	new.ex_path = kstrdup(nxp->ex_path, GFP_KERNEL);
-	if (!new.ex_path)
+	new.ex_pathname = kstrdup(nxp->ex_path, GFP_KERNEL);
+	if (!new.ex_pathname)
 		goto finish;
 	new.ex_client = clp;
-	new.ex_mnt = nd.path.mnt;
-	new.ex_dentry = nd.path.dentry;
+	new.ex_path = nd.path;
 	new.ex_flags = nxp->ex_flags;
 	new.ex_anon_uid = nxp->ex_anon_uid;
 	new.ex_anon_gid = nxp->ex_anon_gid;
@@ -1090,8 +1088,7 @@ exp_export(struct nfsctl_export *nxp)
 	} else
 		err = 0;
 finish:
-	if (new.ex_path)
-		kfree(new.ex_path);
+	kfree(new.ex_pathname);
 	if (exp)
 		exp_put(exp);
 	if (fsid_key && !IS_ERR(fsid_key))
@@ -1360,7 +1357,7 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
 	exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
 	if (IS_ERR(exp))
 		return nfserrno(PTR_ERR(exp));
-	rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
+	rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
 	if (rv)
 		goto out;
 	rv = check_nfsd_access(exp, rqstp);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index eac82830bfd7..c721a1e6e9dd 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -67,7 +67,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 	if (nfserr)
 		RETURN_STATUS(nfserr);
 
-	err = vfs_getattr(resp->fh.fh_export->ex_mnt,
+	err = vfs_getattr(resp->fh.fh_export->ex_path.mnt,
 			  resp->fh.fh_dentry, &resp->stat);
 	nfserr = nfserrno(err);
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d7647f70e02b..17d0dd997204 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -218,7 +218,7 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 	        int err;
 		struct kstat stat;
 
-		err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat);
+		err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat);
 		if (!err) {
 			*p++ = xdr_one;		/* attributes follow */
 			lease_get_mtime(dentry->d_inode, &stat.mtime);
@@ -270,7 +270,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 	if (fhp->fh_post_saved)
 		printk("nfsd: inode locked twice during operation.\n");
 
-	err = vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry,
+	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
 			&fhp->fh_post_attr);
 	if (err)
 		fhp->fh_post_saved = 0;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b0592e7c378d..0e6a179eccaf 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1330,9 +1330,9 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
 	*stat = exp_pseudoroot(rqstp, &tmp_fh);
 	if (*stat)
 		return NULL;
-	rootpath = tmp_fh.fh_export->ex_path;
+	rootpath = tmp_fh.fh_export->ex_pathname;
 
-	path = exp->ex_path;
+	path = exp->ex_pathname;
 
 	if (strncmp(path, rootpath, strlen(rootpath))) {
 		dprintk("nfsd: fs_locations failed;"
@@ -1481,7 +1481,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			goto out;
 	}
 
-	err = vfs_getattr(exp->ex_mnt, dentry, &stat);
+	err = vfs_getattr(exp->ex_path.mnt, dentry, &stat);
 	if (err)
 		goto out_nfserr;
 	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
@@ -1838,9 +1838,9 @@ out_acl:
 		 * and this is the root of a cross-mounted filesystem.
 		 */
 		if (ignore_crossmnt == 0 &&
-		    exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
-			err = vfs_getattr(exp->ex_mnt->mnt_parent,
-				exp->ex_mnt->mnt_mountpoint, &stat);
+		    exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) {
+			err = vfs_getattr(exp->ex_path.mnt->mnt_parent,
+				exp->ex_path.mnt->mnt_mountpoint, &stat);
 			if (err)
 				goto out_nfserr;
 		}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8fbd2dc08a92..0130b345234d 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 		return 1;
 
 	tdentry = dget(dentry);
-	while (tdentry != exp->ex_dentry && ! IS_ROOT(tdentry)) {
+	while (tdentry != exp->ex_path.dentry && !IS_ROOT(tdentry)) {
 		/* make sure parents give x permission to user */
 		int err;
 		parent = dget_parent(tdentry);
@@ -59,9 +59,9 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 		dput(tdentry);
 		tdentry = parent;
 	}
-	if (tdentry != exp->ex_dentry)
+	if (tdentry != exp->ex_path.dentry)
 		dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
-	rv = (tdentry == exp->ex_dentry);
+	rv = (tdentry == exp->ex_path.dentry);
 	dput(tdentry);
 	return rv;
 }
@@ -209,9 +209,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			fileid_type = fh->fh_fileid_type;
 
 		if (fileid_type == FILEID_ROOT)
-			dentry = dget(exp->ex_dentry);
+			dentry = dget(exp->ex_path.dentry);
 		else {
-			dentry = exportfs_decode_fh(exp->ex_mnt, fid,
+			dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
 					data_left, fileid_type,
 					nfsd_acceptable, exp);
 		}
@@ -299,7 +299,7 @@ out:
 static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
 		struct dentry *dentry)
 {
-	if (dentry != exp->ex_dentry) {
+	if (dentry != exp->ex_path.dentry) {
 		struct fid *fid = (struct fid *)
 			(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
 		int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
@@ -344,12 +344,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	struct inode * inode = dentry->d_inode;
 	struct dentry *parent = dentry->d_parent;
 	__u32 *datap;
-	dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev;
-	int root_export = (exp->ex_dentry == exp->ex_dentry->d_sb->s_root);
+	dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
+	int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
 		MAJOR(ex_dev), MINOR(ex_dev),
-		(long) exp->ex_dentry->d_inode->i_ino,
+		(long) exp->ex_path.dentry->d_inode->i_ino,
 		parent->d_name.name, dentry->d_name.name,
 		(inode ? inode->i_ino : 0));
 
@@ -391,7 +391,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 			/* FALL THROUGH */
 		case FSID_MAJOR_MINOR:
 		case FSID_ENCODE_DEV:
-			if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags
+			if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
 			      & FS_REQUIRES_DEV))
 				goto retry;
 			break;
@@ -454,7 +454,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 		fhp->fh_handle.ofh_dev =  old_encode_dev(ex_dev);
 		fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
 		fhp->fh_handle.ofh_xino =
-			ino_t_to_u32(exp->ex_dentry->d_inode->i_ino);
+			ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino);
 		fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
 		if (inode)
 			_fh_update_old(dentry, exp, &fhp->fh_handle);
@@ -465,7 +465,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 		datap = fhp->fh_handle.fh_auth+0;
 		fhp->fh_handle.fh_fsid_type = fsid_type;
 		mk_fsid(fsid_type, datap, ex_dev,
-			exp->ex_dentry->d_inode->i_ino,
+			exp->ex_path.dentry->d_inode->i_ino,
 			exp->ex_fsid, exp->ex_uuid);
 
 		len = key_len(fsid_type);
@@ -571,7 +571,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
 	case FSID_DEV:
 	case FSID_ENCODE_DEV:
 	case FSID_MAJOR_MINOR:
-		if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags
+		if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
 		    & FS_REQUIRES_DEV)
 			return FSIDSOURCE_DEV;
 		break;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 977a71f64e19..6cfc96a12483 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -41,7 +41,7 @@ static __be32
 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
 {
 	if (err) return err;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
 				    resp->fh.fh_dentry,
 				    &resp->stat));
 }
@@ -49,7 +49,7 @@ static __be32
 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
 {
 	if (err) return err;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
 				    resp->fh.fh_dentry,
 				    &resp->stat));
 }
@@ -164,7 +164,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
 				  &resp->count);
 
 	if (nfserr) return nfserr;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
+	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
 				    resp->fh.fh_dentry,
 				    &resp->stat));
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 61ad61743d94..afd08e2c90a5 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -207,7 +207,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct kstat stat;
-	vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat);
+	vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat);
 	return encode_fattr(rqstp, p, fhp, &stat);
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cc75e4fcd02b..46f59d5365a0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -101,7 +101,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 {
 	struct svc_export *exp = *expp, *exp2 = NULL;
 	struct dentry *dentry = *dpp;
-	struct vfsmount *mnt = mntget(exp->ex_mnt);
+	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
 	struct dentry *mounts = dget(dentry);
 	int err = 0;
 
@@ -156,15 +156,15 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (isdotent(name, len)) {
 		if (len==1)
 			dentry = dget(dparent);
-		else if (dparent != exp->ex_dentry) {
+		else if (dparent != exp->ex_path.dentry)
 			dentry = dget_parent(dparent);
-		} else  if (!EX_NOHIDE(exp))
+		else if (!EX_NOHIDE(exp))
 			dentry = dget(dparent); /* .. == . just like at / */
 		else {
 			/* checking mountpoint crossing is very different when stepping up */
 			struct svc_export *exp2 = NULL;
 			struct dentry *dp;
-			struct vfsmount *mnt = mntget(exp->ex_mnt);
+			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
 			dentry = dget(dparent);
 			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
 				;
@@ -721,7 +721,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
 		DQUOT_INIT(inode);
 	}
-	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
+	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
+				flags);
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
 out_nfserr:
@@ -1462,7 +1463,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	if (!inode->i_op || !inode->i_op->readlink)
 		goto out;
 
-	touch_atime(fhp->fh_export->ex_mnt, dentry);
+	touch_atime(fhp->fh_export->ex_path.mnt, dentry);
 	/* N.B. Why does this call need a get_fs()??
 	 * Remove the set_fs and watch the fireworks:-) --okir
 	 */
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 3a1687251367..491dec1e37ca 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -84,9 +84,8 @@ struct svc_export {
 	struct cache_head	h;
 	struct auth_domain *	ex_client;
 	int			ex_flags;
-	struct vfsmount *	ex_mnt;
-	struct dentry *		ex_dentry;
-	char *			ex_path;
+	struct path		ex_path;
+	char			*ex_pathname;
 	uid_t			ex_anon_uid;
 	gid_t			ex_anon_gid;
 	int			ex_fsid;
-- 
cgit 


From e83aece3afad4d56cc01abe069d3519e851cd2de Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:41 -0800
Subject: Use struct path in struct svc_expkey

I'm embedding struct path into struct svc_expkey.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/export.c            | 30 +++++++++++++-----------------
 include/linux/nfsd/export.h |  3 +--
 2 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7d7896814fa4..b59f8590af47 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -63,10 +63,8 @@ static void expkey_put(struct kref *ref)
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
 	if (test_bit(CACHE_VALID, &key->h.flags) &&
-	    !test_bit(CACHE_NEGATIVE, &key->h.flags)) {
-		dput(key->ek_dentry);
-		mntput(key->ek_mnt);
-	}
+	    !test_bit(CACHE_NEGATIVE, &key->h.flags))
+		path_put(&key->ek_path);
 	auth_domain_put(key->ek_client);
 	kfree(key);
 }
@@ -169,9 +167,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 			goto out;
 
 		dprintk("Found the path %s\n", buf);
-		key.ek_mnt = nd.path.mnt;
-		key.ek_dentry = nd.path.dentry;
-		
+		key.ek_path = nd.path;
+
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			cache_put(&ek->h, &svc_expkey_cache);
@@ -206,7 +203,7 @@ static int expkey_show(struct seq_file *m,
 	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		seq_printf(m, " ");
-		seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n");
+		seq_path(m, ek->ek_path.mnt, ek->ek_path.dentry, "\\ \t\n");
 	}
 	seq_printf(m, "\n");
 	return 0;
@@ -243,8 +240,8 @@ static inline void expkey_update(struct cache_head *cnew,
 	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
 	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
 
-	new->ek_mnt = mntget(item->ek_mnt);
-	new->ek_dentry = dget(item->ek_dentry);
+	new->ek_path = item->ek_path;
+	path_get(&item->ek_path);
 }
 
 static struct cache_head *expkey_alloc(void)
@@ -814,8 +811,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
-	key.ek_mnt = exp->ex_path.mnt;
-	key.ek_dentry = exp->ex_path.dentry;
+	key.ek_path = exp->ex_path;
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
@@ -864,7 +860,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 {
 	struct svc_export *exp, key;
 	int err;
-	
+
 	if (!clp)
 		return ERR_PTR(-ENOENT);
 
@@ -1036,9 +1032,9 @@ exp_export(struct nfsctl_export *nxp)
 	/* must make sure there won't be an ex_fsid clash */
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) &&
-	    fsid_key->ek_mnt &&
-	    (fsid_key->ek_mnt != nd.path.mnt ||
-	     fsid_key->ek_dentry != nd.path.dentry))
+	    fsid_key->ek_path.mnt &&
+	    (fsid_key->ek_path.mnt != nd.path.mnt ||
+	     fsid_key->ek_path.dentry != nd.path.dentry))
 		goto finish;
 
 	if (!IS_ERR(exp)) {
@@ -1218,7 +1214,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
 	if (IS_ERR(ek))
 		return ERR_CAST(ek);
 
-	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
+	exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
 	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (IS_ERR(exp))
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 491dec1e37ca..5431512b2757 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -106,8 +106,7 @@ struct svc_expkey {
 	int			ek_fsidtype;
 	u32			ek_fsid[6];
 
-	struct vfsmount *	ek_mnt;
-	struct dentry *		ek_dentry;
+	struct path		ek_path;
 };
 
 #define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
-- 
cgit 


From c32c2f63a9d6c953aaf168c0b2551da9734f76d2 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:43 -0800
Subject: d_path: Make seq_path() use a struct path argument

seq_path() is always called with a dentry and a vfsmount from a struct path.
Make seq_path() take it directly as an argument.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c          | 3 +--
 fs/namespace.c           | 6 ++++--
 fs/nfsd/export.c         | 4 ++--
 fs/proc/nommu.c          | 2 +-
 fs/proc/task_mmu.c       | 2 +-
 fs/seq_file.c            | 7 +++----
 include/linux/seq_file.h | 5 ++---
 mm/mempolicy.c           | 2 +-
 mm/swapfile.c            | 2 +-
 9 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5fc326d3970e..7da6ec244e15 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5197,8 +5197,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 				chunk_kb ? "KB" : "B");
 			if (bitmap->file) {
 				seq_printf(seq, ", file: ");
-				seq_path(seq, bitmap->file->f_path.mnt,
-					 bitmap->file->f_path.dentry," \t\n");
+				seq_path(seq, &bitmap->file->f_path, " \t\n");
 			}
 
 			seq_printf(seq, "\n");
diff --git a/fs/namespace.c b/fs/namespace.c
index eef57635ee07..7953c96a2071 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -408,10 +408,11 @@ static int show_vfsmnt(struct seq_file *m, void *v)
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 
 	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_path(m, &mnt_path, " \t\n\\");
 	seq_putc(m, ' ');
 	mangle(m, mnt->mnt_sb->s_type->name);
 	if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
@@ -443,6 +444,7 @@ struct seq_operations mounts_op = {
 static int show_vfsstat(struct seq_file *m, void *v)
 {
 	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	int err = 0;
 
 	/* device */
@@ -454,7 +456,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
 
 	/* mount point */
 	seq_puts(m, " mounted on ");
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_path(m, &mnt_path, " \t\n\\");
 	seq_putc(m, ' ');
 
 	/* file system type */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b59f8590af47..4a85b40eef4f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -203,7 +203,7 @@ static int expkey_show(struct seq_file *m,
 	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		seq_printf(m, " ");
-		seq_path(m, ek->ek_path.mnt, ek->ek_path.dentry, "\\ \t\n");
+		seq_path(m, &ek->ek_path, "\\ \t\n");
 	}
 	seq_printf(m, "\n");
 	return 0;
@@ -649,7 +649,7 @@ static int svc_export_show(struct seq_file *m,
 		return 0;
 	}
 	exp = container_of(h, struct svc_export, h);
-	seq_path(m, exp->ex_path.mnt, exp->ex_path.dentry, " \t\n\\");
+	seq_path(m, &exp->ex_path, " \t\n\\");
 	seq_putc(m, '\t');
 	seq_escape(m, exp->ex_client->name, " \t\n\\");
 	seq_putc(m, '(');
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 5d9147b9d738..941e95114b5a 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -67,7 +67,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 		if (len < 1)
 			len = 1;
 		seq_printf(m, "%*c", len, ' ');
-		seq_path(m, file->f_path.mnt, file->f_path.dentry, "");
+		seq_path(m, &file->f_path, "");
 	}
 
 	seq_putc(m, '\n');
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4c4f99fb1bfc..49958cffbd8d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -271,7 +271,7 @@ static int show_map(struct seq_file *m, void *v)
 	 */
 	if (file) {
 		pad_len_spaces(m, len);
-		seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
+		seq_path(m, &file->f_path, "\n");
 	} else {
 		const char *name = arch_vma_name(vma);
 		if (!name) {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index ca71c115bdaa..8d862907f060 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -342,13 +342,12 @@ int seq_printf(struct seq_file *m, const char *f, ...)
 }
 EXPORT_SYMBOL(seq_printf);
 
-int seq_path(struct seq_file *m,
-	     struct vfsmount *mnt, struct dentry *dentry,
-	     char *esc)
+int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
-		char *p = d_path(dentry, mnt, s, m->size - m->count);
+		char *p = d_path(path->dentry, path->mnt, s,
+				 m->size - m->count);
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 648dfeb444db..67c2563961f3 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -8,8 +8,7 @@
 
 struct seq_operations;
 struct file;
-struct vfsmount;
-struct dentry;
+struct path;
 struct inode;
 
 struct seq_file {
@@ -42,7 +41,7 @@ int seq_puts(struct seq_file *m, const char *s);
 int seq_printf(struct seq_file *, const char *, ...)
 	__attribute__ ((format (printf,2,3)));
 
-int seq_path(struct seq_file *, struct vfsmount *, struct dentry *, char *);
+int seq_path(struct seq_file *, struct path *, char *);
 
 int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
 int single_release(struct inode *, struct file *);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8d246c3b340f..6c7ba1a63d23 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1996,7 +1996,7 @@ int show_numa_map(struct seq_file *m, void *v)
 
 	if (file) {
 		seq_printf(m, " file=");
-		seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n\t= ");
+		seq_path(m, &file->f_path, "\n\t= ");
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_printf(m, " heap");
 	} else if (vma->vm_start <= mm->start_stack &&
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 02ccab5ad9d9..2da149cfc9ac 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1394,7 +1394,7 @@ static int swap_show(struct seq_file *swap, void *v)
 	}
 
 	file = ptr->swap_file;
-	len = seq_path(swap, file->f_path.mnt, file->f_path.dentry, " \t\n\\");
+	len = seq_path(swap, &file->f_path, " \t\n\\");
 	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
 		       len < 40 ? 40 - len : 1, " ",
 		       S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
-- 
cgit 


From cf28b4863f9ee8f122e8ff3ac0d403e07ba9c6d9 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Thu, 14 Feb 2008 19:38:44 -0800
Subject: d_path: Make d_path() use a struct path

d_path() is used on a <dentry,vfsmount> pair.  Lets use a struct path to
reflect this.

[akpm@linux-foundation.org: fix build in mm/memory.c]
Signed-off-by: Jan Blunck <jblunck@suse.de>
Acked-by: Bryan Wu <bryan.wu@analog.com>
Acked-by: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/blackfin/kernel/traps.c      | 12 +++++-------
 drivers/md/bitmap.c               |  8 +-------
 drivers/usb/gadget/file_storage.c |  8 +++-----
 fs/compat_ioctl.c                 |  2 +-
 fs/dcache.c                       | 12 +++++-------
 fs/dcookies.c                     |  2 +-
 fs/nfsd/export.c                  |  2 +-
 fs/proc/base.c                    |  2 +-
 fs/seq_file.c                     |  3 +--
 include/linux/dcache.h            |  5 +++--
 kernel/audit.c                    |  2 +-
 mm/memory.c                       |  2 +-
 12 files changed, 24 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 58717cb19707..56a67ab698c7 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -126,15 +126,13 @@ static void decode_address(char *buf, unsigned long address)
 			struct vm_area_struct *vma = vml->vma;
 
 			if (address >= vma->vm_start && address < vma->vm_end) {
+				char _tmpbuf[256];
 				char *name = p->comm;
 				struct file *file = vma->vm_file;
-				if (file) {
-					char _tmpbuf[256];
-					name = d_path(file->f_dentry,
-					              file->f_vfsmnt,
-					              _tmpbuf,
-					              sizeof(_tmpbuf));
-				}
+
+				if (file)
+					name = d_path(&file->f_path, _tmpbuf,
+						      sizeof(_tmpbuf));
 
 				/* FLAT does not have its text aligned to the start of
 				 * the map while FDPIC ELF does ...
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index a0585fb6da94..7aeceedcf7d4 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -206,16 +206,10 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
 /* copy the pathname of a file to a buffer */
 char *file_path(struct file *file, char *buf, int count)
 {
-	struct dentry *d;
-	struct vfsmount *v;
-
 	if (!buf)
 		return NULL;
 
-	d = file->f_path.dentry;
-	v = file->f_path.mnt;
-
-	buf = d_path(d, v, buf, count);
+	buf = d_path(&file->f_path, buf, count);
 
 	return IS_ERR(buf) ? NULL : buf;
 }
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 3301167d4f2a..017a196d041f 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -3563,8 +3563,7 @@ static ssize_t show_file(struct device *dev, struct device_attribute *attr,
 
 	down_read(&fsg->filesem);
 	if (backing_file_is_open(curlun)) {	// Get the complete pathname
-		p = d_path(curlun->filp->f_path.dentry,
-				curlun->filp->f_path.mnt, buf, PAGE_SIZE - 1);
+		p = d_path(&curlun->filp->f_path, buf, PAGE_SIZE - 1);
 		if (IS_ERR(p))
 			rc = PTR_ERR(p);
 		else {
@@ -3981,9 +3980,8 @@ static int __init fsg_bind(struct usb_gadget *gadget)
 		if (backing_file_is_open(curlun)) {
 			p = NULL;
 			if (pathbuf) {
-				p = d_path(curlun->filp->f_path.dentry,
-					curlun->filp->f_path.mnt,
-					pathbuf, PATH_MAX);
+				p = d_path(&curlun->filp->f_path,
+					   pathbuf, PATH_MAX);
 				if (IS_ERR(p))
 					p = NULL;
 			}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ee32c0eac7c1..c6e72aebd16b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2853,7 +2853,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
 	/* find the name of the device. */
 	path = (char *)__get_free_page(GFP_KERNEL);
 	if (path) {
-		fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE);
+		fn = d_path(&filp->f_path, path, PAGE_SIZE);
 		if (IS_ERR(fn))
 			fn = "?";
 	}
diff --git a/fs/dcache.c b/fs/dcache.c
index 170efbcb1a9b..7b4b080219f8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1845,8 +1845,7 @@ Elong:
 
 /**
  * d_path - return the path of a dentry
- * @dentry: dentry to report
- * @vfsmnt: vfsmnt to which the dentry belongs
+ * @path: path to report
  * @buf: buffer to return value in
  * @buflen: buffer length
  *
@@ -1857,8 +1856,7 @@ Elong:
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
  */
-char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
-	     char *buf, int buflen)
+char *d_path(struct path *path, char *buf, int buflen)
 {
 	char *res;
 	struct path root;
@@ -1870,15 +1868,15 @@ char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
 	 * user wants to identify the object in /proc/pid/fd/.  The little hack
 	 * below allows us to generate a name for these objects on demand:
 	 */
-	if (dentry->d_op && dentry->d_op->d_dname)
-		return dentry->d_op->d_dname(dentry, buf, buflen);
+	if (path->dentry->d_op && path->dentry->d_op->d_dname)
+		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
 	read_lock(&current->fs->lock);
 	root = current->fs->root;
 	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 	spin_lock(&dcache_lock);
-	res = __d_path(dentry, vfsmnt, &root, buf, buflen);
+	res = __d_path(path->dentry, path->mnt, &root, buf, buflen);
 	spin_unlock(&dcache_lock);
 	path_put(&root);
 	return res;
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 13c29f1f711f..855d4b1d619a 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -171,7 +171,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len)
 		goto out;
 
 	/* FIXME: (deleted) ? */
-	path = d_path(dcs->path.dentry, dcs->path.mnt, kbuf, PAGE_SIZE);
+	path = d_path(&dcs->path, kbuf, PAGE_SIZE);
 
 	if (IS_ERR(path)) {
 		err = PTR_ERR(path);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 4a85b40eef4f..8a6f7c924c75 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -345,7 +345,7 @@ static void svc_export_request(struct cache_detail *cd,
 	char *pth;
 
 	qword_add(bpp, blen, exp->ex_client->name);
-	pth = d_path(exp->ex_path.dentry, exp->ex_path.mnt, *bpp, *blen);
+	pth = d_path(&exp->ex_path, *bpp, *blen);
 	if (IS_ERR(pth)) {
 		/* is this correct? */
 		(*bpp)[0] = '\n';
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 47338d92db51..88f8edf18258 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1185,7 +1185,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
 	if (!tmp)
 		return -ENOMEM;
 
-	pathname = d_path(path->dentry, path->mnt, tmp, PAGE_SIZE);
+	pathname = d_path(path, tmp, PAGE_SIZE);
 	len = PTR_ERR(pathname);
 	if (IS_ERR(pathname))
 		goto out;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 8d862907f060..853770274f20 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -346,8 +346,7 @@ int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
-		char *p = d_path(path->dentry, path->mnt, s,
-				 m->size - m->count);
+		char *p = d_path(path, s, m->size - m->count);
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c2c153f97e8f..6bd646096fa6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -10,6 +10,7 @@
 #include <linux/rcupdate.h>
 
 struct nameidata;
+struct path;
 struct vfsmount;
 
 /*
@@ -300,8 +301,8 @@ extern int d_validate(struct dentry *, struct dentry *);
  */
 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 
-extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
-  
+extern char *d_path(struct path *, char *, int);
+
 /* Allocation counts.. */
 
 /**
diff --git a/kernel/audit.c b/kernel/audit.c
index 783e65701247..2eeea9a14240 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1325,7 +1325,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
 		audit_log_format(ab, "<no memory>");
 		return;
 	}
-	p = d_path(path->dentry, path->mnt, pathname, PATH_MAX+11);
+	p = d_path(path, pathname, PATH_MAX+11);
 	if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */
 		/* FIXME: can we save some information here? */
 		audit_log_format(ab, "<too long>");
diff --git a/mm/memory.c b/mm/memory.c
index 717aa0e3be2d..e7a6dcacefc1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2719,7 +2719,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
 		if (buf) {
 			char *p, *s;
 
-			p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);
+			p = d_path(&f->f_path, buf, PAGE_SIZE);
 			if (IS_ERR(p))
 				p = "?";
 			s = strrchr(p, '/');
-- 
cgit